In [3]:
import pickle
import pandas as pd
import numpy as np
import skfuzzy as fuzz
from skfuzzy import control as ctrl

with open('dataset/filtered_df.pkl', 'rb') as f:
    df = pickle.load(f)  
df

Unnamed: 0,Employment_Status,Annual_Income,Credit_Score,Outstanding_Debt,Loan_Amount_Requested,Loan_Type,Loan_Approval_Status
0,Employed,139901,743,19822,24535,Secured,1
1,Employed,21162,468,18321,8288,Unsecured,0
4,Employed,81753,752,8780,23360,Unsecured,1
8,Self-Employed,121626,803,13940,20239,Secured,0
10,Self-Employed,54670,772,22104,20000,Unsecured,1
...,...,...,...,...,...,...,...
51986,Self-Employed,105630,734,14841,26616,Unsecured,1
51989,Unemployed,118085,714,9803,30183,Secured,0
51990,Self-Employed,134581,756,15172,24717,Unsecured,1
51992,Unemployed,34019,572,19031,8383,Unsecured,0


In [4]:
# Calculating the max, min, and average of 'Annual_income' column
max_income = df['Annual_Income'].max()
max_loan = df['Loan_Amount_Requested'].max()
max_outstanding_debt = df['Outstanding_Debt'].max()

def calculate_cutoffs(data, column_name):
    """
    Calculates quantile-based cutoffs for a column in the data.
    
    Parameters:
    - data: The DataFrame containing the data.
    - column_name: The column name to base the cutoff calculations on.

    Returns:
    - A dictionary containing the low, medium, and high cutoffs.
    """
    return {
        "low_cutoff": data[column_name].quantile(0.5),
        "medium_start": data[column_name].quantile(0.25),
        "medium_middle": data[column_name].quantile(0.5),
        "medium_end": data[column_name].quantile(0.75),
        "max_data": data[column_name].max(),
    }


def membership_function(antecedent, cutoffs):
    """
    Sets up 'low', 'medium', and 'high' fuzzy membership functions using provided cutoffs.
    
    Parameters:
    - antecedent: The fuzzy antecedent to apply the memberships to.
    - cutoffs: A dictionary containing precomputed cutoff values.
    """
    # Define triangular membership functions
    antecedent['low'] = fuzz.trimf(antecedent.universe, [0, 0, cutoffs["low_cutoff"]])
    antecedent['medium'] = fuzz.trimf(antecedent.universe, [cutoffs["medium_start"], cutoffs["medium_middle"], cutoffs["medium_end"]])
    antecedent['high'] = fuzz.trimf(antecedent.universe, [cutoffs["medium_middle"], cutoffs["max_data"], cutoffs["max_data"]])


def get_fuzzy_sets(df):
    """
    Returns the fuzzy sets and their corresponding results for 'Annual_Income', 
    'Loan_Amount_Requested', 'Outstanding_Debt', and 'result' (loan category).

    Parameters:
    - df: The dataframe containing the data to base the fuzzy sets on.

    Returns:
    - annual_income, loan_amount_requested, outstanding_debt, result: The fuzzy sets and results.
    """

    # Calculate max values from the data
    max_loan = df['Loan_Amount_Requested'].max()

    # Create fuzzy sets for each column
    annual_income = ctrl.Antecedent(np.arange(0, df['Annual_Income'].max() + 1, 1000), 'Annual_Income')
    outstanding_debt = ctrl.Antecedent(np.arange(0, df['Outstanding_Debt'].max() + 1, 1000), 'Outstanding_Debt')
    loan_amount_requested = ctrl.Consequent(np.arange(0, max_loan + 1, 1000), 'result')

    # Apply membership function
    membership_function(annual_income, calculate_cutoffs(df, "Annual_Income"))
    membership_function(loan_amount_requested, calculate_cutoffs(df, "Loan_Amount_Requested"))
    membership_function(outstanding_debt, calculate_cutoffs(df, "Outstanding_Debt"))

    # result['low'] = fuzz.trimf(result.universe, [0, 0, max_loan / 2])
    # result['medium'] = fuzz.trimf(result.universe, [0, max_loan / 2, max_loan])
    # result['high'] = fuzz.trimf(result.universe, [max_loan / 2, max_loan, max_loan])

    # Return all fuzzy sets and the result categories
    return annual_income, loan_amount_requested, outstanding_debt

def print_fuzzy_object_details(fuzzy_object):
    """
    Prints the detailed contents of a fuzzy object (Antecedent or Consequent).
    
    Parameters:
    - fuzzy_object: The fuzzy object to inspect (Antecedent or Consequent).
    """
    print(f"Name: {fuzzy_object.label}")
    print("Universe:", fuzzy_object.universe)
    print("Membership Functions:")
    for term_name, membership_func in fuzzy_object.terms.items():
        print(f"  {term_name}: {membership_func.mf}")


# Example usage
annual_income, loan_amount_requested, outstanding_debt = get_fuzzy_sets(df)

print("Details for Annual Income:")
print_fuzzy_object_details(annual_income)

Details for Annual Income:
Name: Annual_Income
Universe: [     0   1000   2000   3000   4000   5000   6000   7000   8000   9000
  10000  11000  12000  13000  14000  15000  16000  17000  18000  19000
  20000  21000  22000  23000  24000  25000  26000  27000  28000  29000
  30000  31000  32000  33000  34000  35000  36000  37000  38000  39000
  40000  41000  42000  43000  44000  45000  46000  47000  48000  49000
  50000  51000  52000  53000  54000  55000  56000  57000  58000  59000
  60000  61000  62000  63000  64000  65000  66000  67000  68000  69000
  70000  71000  72000  73000  74000  75000  76000  77000  78000  79000
  80000  81000  82000  83000  84000  85000  86000  87000  88000  89000
  90000  91000  92000  93000  94000  95000  96000  97000  98000  99000
 100000 101000 102000 103000 104000 105000 106000 107000 108000 109000
 110000 111000 112000 113000 114000 115000 116000 117000 118000 119000
 120000 121000 122000 123000 124000 125000 126000 127000 128000 129000
 130000 131000 13200

In [5]:


# # Rule 1: Low income and low debt result in a low loan amount
# rule1 = ctrl.Rule(annual_income['low'] & outstanding_debt['low'], result['low'])

# # Rule 2: Medium income and low debt result in a medium loan amount
# rule2 = ctrl.Rule(annual_income['medium'] & outstanding_debt['low'], result['medium'])

# # Rule 3: High income and low debt result in a high loan amount
# rule3 = ctrl.Rule(annual_income['high'] & outstanding_debt['low'], result['high'])

# # Rule 4: Low income and medium debt result in a low loan amount
# rule4 = ctrl.Rule(annual_income['low'] & outstanding_debt['medium'], result['low'])

# # Rule 5: Medium income and medium debt result in a medium loan amount
# rule5 = ctrl.Rule(annual_income['medium'] & outstanding_debt['medium'], result['medium'])

# # Rule 6: High income and medium debt result in a high loan amount
# rule6 = ctrl.Rule(annual_income['high'] & outstanding_debt['medium'], result['high'])

# # Rule 7: Low income and high debt result in a low loan amount
# rule7 = ctrl.Rule(annual_income['low'] & outstanding_debt['high'], result['low'])

# # Rule 8: Medium income and high debt result in a medium loan amount
# rule8 = ctrl.Rule(annual_income['medium'] & outstanding_debt['high'], result['low'])

# # Rule 9: High income and high debt result in a high loan amount
# rule9 = ctrl.Rule(annual_income['high'] & outstanding_debt['high'], result['medium'])

# from itertools import product

# # Define the groups of words
# parenthesis_groups = [
#     ["employment_status_unemployed", "employment_status_selfemployed", "employment_status_employed"],
#     ["annual_income_low", "annual_income_medium", "annual_income_high"],
#     ["credit_score_low", "credit_score_medium", "credit_score_high"],
#     ["outstanding_debt_low", "outstanding_debt_medium", "outstanding_debt_high"],
#     ["loan_type_secured", "loan_type_unsecured"]
# ]

# # Find every combination of one word from each parenthesis
# combinations = list(product(*parenthesis_groups))

# # Print all combinations
# for combination in combinations:
#     print(combination)

# # Count total combinations for reference
# len(combinations)



In [6]:
# Function to calculate the categorical result for a single row based on the given rules
def calculate_categorical_result(row):
    # Inputs from the dataset
    income = row['Annual_Income']
    debt = row['Outstanding_Debt']
    requested_amount = row['Loan_Amount_Requested']
    
    # Fuzzify inputs using existing membership functions
    income_low = fuzz.interp_membership(annual_income.universe, annual_income['low'].mf, income)
    income_medium = fuzz.interp_membership(annual_income.universe, annual_income['medium'].mf, income)
    income_high = fuzz.interp_membership(annual_income.universe, annual_income['high'].mf, income)
    
    debt_low = fuzz.interp_membership(outstanding_debt.universe, outstanding_debt['low'].mf, debt)
    debt_medium = fuzz.interp_membership(outstanding_debt.universe, outstanding_debt['medium'].mf, debt)
    debt_high = fuzz.interp_membership(outstanding_debt.universe, outstanding_debt['high'].mf, debt)

    # Fuzzify the requested amount
    requested_low = fuzz.interp_membership(loan_amount_requested.universe, loan_amount_requested['low'].mf, requested_amount)
    requested_medium = fuzz.interp_membership(loan_amount_requested.universe, loan_amount_requested['medium'].mf, requested_amount)
    requested_high = fuzz.interp_membership(loan_amount_requested.universe, loan_amount_requested['high'].mf, requested_amount)


    # Apply rules to calculate activation strength for each result
    rule_activations = {
        'low': np.fmax.reduce([
            np.fmin(income_low, debt_low),   # Rule 1
            np.fmin(income_low, debt_medium), # Rule 4
            np.fmin(income_low, debt_high)   # Rule 7
        ]),
        'medium': np.fmax.reduce([
            np.fmin(income_medium, debt_low),  # Rule 2
            np.fmin(income_medium, debt_medium), # Rule 5
            np.fmin(income_medium, debt_high)  # Rule 8
        ]),
        'high': np.fmax.reduce([
            np.fmin(income_high, debt_low),   # Rule 3
            np.fmin(income_high, debt_medium), # Rule 6
            np.fmin(income_high, debt_high)   # Rule 9
        ]),
    }

    # Store membership levels in a dictionary
    membership_levels = {
        'low': requested_low,
        'medium': requested_medium,
        'high': requested_high
    }

    # Determine the result category with the highest activation strength
    result_category = max(rule_activations, key=rule_activations.get)

    # Determine the category with the highest membership level
    fuzzy_request_category = max(membership_levels, key=membership_levels.get)
    return fuzzy_request_category, result_category

# Apply the function and unpack the tuple into two separate columns
df[['Fuzzy_Request', 'Fuzzy_Loan_Eligibility']] = df.apply(
    lambda row: pd.Series(calculate_categorical_result(row)), axis=1
)

# Display the dataset with fuzzy results
print(df.head())


   Employment_Status  Annual_Income  Credit_Score  Outstanding_Debt  \
0           Employed         139901           743             19822   
1           Employed          21162           468             18321   
4           Employed          81753           752              8780   
8      Self-Employed         121626           803             13940   
10     Self-Employed          54670           772             22104   

    Loan_Amount_Requested  Loan_Type  Loan_Approval_Status Fuzzy_Request  \
0                   24535    Secured                     1        medium   
1                    8288  Unsecured                     0           low   
4                   23360  Unsecured                     1        medium   
8                   20239    Secured                     0        medium   
10                  20000  Unsecured                     1        medium   

   Fuzzy_Loan_Eligibility  
0                    high  
1                     low  
4                  medium  
8   

In [7]:
# calculating whether the applicant gets approved
def calculate_fuzzy_approval_status(row):
    """
    Calculates the Fuzzy Approval Status based on the Fuzzy Request and Fuzzy Loan Eligibility.
    
    Parameters:
    - row: A row of the DataFrame containing 'Fuzzy_Request' and 'Fuzzy_Loan_Eligibility'.
    
    Returns:
    - Approval status as 1 (approved) or 0 (not approved).
    """
    request = row['Fuzzy_Request']
    loan_eligibility = row['Fuzzy_Loan_Eligibility']
    
    if request == "low":
        return 1  # Approved
    elif loan_eligibility == "low" and request == "medium":
        return 0  # Not approved
    elif loan_eligibility == "low" and request == "high":
        return 0  # Not approved
    elif loan_eligibility == "medium" and request == "medium":
        return 1  # Approved
    elif loan_eligibility == "medium" and request == "high":
        return 1  # Approved
    elif loan_eligibility == "high":
        return 1  # Approved
    else:
        return 0  # Default to not approved for unhandled cases (optional safety net)

# Apply the function to the DataFrame
df['Fuzzy_Approval_Status'] = df.apply(calculate_fuzzy_approval_status, axis=1)

# Display the updated DataFrame
print(df.head())

# Calculate the number of rows where the values in both columns are the same
matching_rows_count = (df['Loan_Approval_Status'] == df['Fuzzy_Approval_Status']).sum()

print(f"Number of matching rows: {matching_rows_count}")
print(f"Number of applicants for home loans:{df['Loan_Approval_Status'].sum()}")


   Employment_Status  Annual_Income  Credit_Score  Outstanding_Debt  \
0           Employed         139901           743             19822   
1           Employed          21162           468             18321   
4           Employed          81753           752              8780   
8      Self-Employed         121626           803             13940   
10     Self-Employed          54670           772             22104   

    Loan_Amount_Requested  Loan_Type  Loan_Approval_Status Fuzzy_Request  \
0                   24535    Secured                     1        medium   
1                    8288  Unsecured                     0           low   
4                   23360  Unsecured                     1        medium   
8                   20239    Secured                     0        medium   
10                  20000  Unsecured                     1        medium   

   Fuzzy_Loan_Eligibility  Fuzzy_Approval_Status  
0                    high                      1  
1             