In [86]:
import pandas as pd 
import numpy as np 
import os
from datetime import datetime, timedelta
import random

In [87]:
base_dir = os.path.dirname(os.path.abspath('data.ipynb'))
credit_case_study_folder = os.path.join(base_dir, 'Credit_EDA_case_study')
application_file_name = 'application_data.csv'

credit_case_study_data = pd.read_csv(os.path.join(credit_case_study_folder,application_file_name))
credit_case_study_data.head(10)

Unnamed: 0,SK_ID_CURR,TARGET,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,...,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100002,1,Cash loans,M,N,Y,0,202500.0,406597.5,24700.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
1,100003,0,Cash loans,F,N,N,0,270000.0,1293502.5,35698.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,100004,0,Revolving loans,M,Y,Y,0,67500.0,135000.0,6750.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
3,100006,0,Cash loans,F,N,Y,0,135000.0,312682.5,29686.5,...,0,0,0,0,,,,,,
4,100007,0,Cash loans,M,N,Y,0,121500.0,513000.0,21865.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
5,100008,0,Cash loans,M,N,Y,0,99000.0,490495.5,27517.5,...,0,0,0,0,0.0,0.0,0.0,0.0,1.0,1.0
6,100009,0,Cash loans,F,Y,Y,1,171000.0,1560726.0,41301.0,...,0,0,0,0,0.0,0.0,0.0,1.0,1.0,2.0
7,100010,0,Cash loans,M,Y,Y,0,360000.0,1530000.0,42075.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
8,100011,0,Cash loans,F,N,Y,0,112500.0,1019610.0,33826.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
9,100012,0,Revolving loans,M,N,Y,0,135000.0,405000.0,20250.0,...,0,0,0,0,,,,,,


In [88]:
credit_case_study_data = credit_case_study_data[[
    'SK_ID_CURR', 'DAYS_BIRTH', 'CODE_GENDER', 'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'AMT_INCOME_TOTAL'
]]
credit_case_study_data.head(10)

Unnamed: 0,SK_ID_CURR,DAYS_BIRTH,CODE_GENDER,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,AMT_INCOME_TOTAL
0,100002,-9461,M,Secondary / secondary special,Single / not married,202500.0
1,100003,-16765,F,Higher education,Married,270000.0
2,100004,-19046,M,Secondary / secondary special,Single / not married,67500.0
3,100006,-19005,F,Secondary / secondary special,Civil marriage,135000.0
4,100007,-19932,M,Secondary / secondary special,Single / not married,121500.0
5,100008,-16941,M,Secondary / secondary special,Married,99000.0
6,100009,-13778,F,Higher education,Married,171000.0
7,100010,-18850,M,Higher education,Married,360000.0
8,100011,-20099,F,Secondary / secondary special,Married,112500.0
9,100012,-14469,M,Secondary / secondary special,Single / not married,135000.0


In [89]:
rename_dict = {
    'SK_ID_CURR' : 'Customer ID', 
    'DAYS_BIRTH': 'Customer Age', 
    'CODE_GENDER': 'Gender', 
    'NAME_EDUCATION_TYPE': 'Education', 
    'NAME_FAMILY_STATUS': 'Marital Status', 
    'AMT_INCOME_TOTAL': 'Income Category'
}

credit_case_study_data = credit_case_study_data.rename(columns=rename_dict)
credit_case_study_data.head(10)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category
0,100002,-9461,M,Secondary / secondary special,Single / not married,202500.0
1,100003,-16765,F,Higher education,Married,270000.0
2,100004,-19046,M,Secondary / secondary special,Single / not married,67500.0
3,100006,-19005,F,Secondary / secondary special,Civil marriage,135000.0
4,100007,-19932,M,Secondary / secondary special,Single / not married,121500.0
5,100008,-16941,M,Secondary / secondary special,Married,99000.0
6,100009,-13778,F,Higher education,Married,171000.0
7,100010,-18850,M,Higher education,Married,360000.0
8,100011,-20099,F,Secondary / secondary special,Married,112500.0
9,100012,-14469,M,Secondary / secondary special,Single / not married,135000.0


In [90]:
credit_case_study_data['Customer Age'] = abs(credit_case_study_data['Customer Age']/365).astype(int)

credit_case_study_data['Income Numeric'] = credit_case_study_data['Income Category']
income_bins = [0, 40000, 60000, 80000, 100000, 120000, float('inf')]  
income_labels = ['Less than $40k', '$40k - $60k', '$60k to $80k', '$80k to $100k', '$100k to $120k', '$120k+']
credit_case_study_data['Income Category'] = pd.cut(credit_case_study_data['Income Category'], bins=income_bins, labels=income_labels, right=False)

education_mapping = {
    'Academic degree': 'College',
    'Higher education': 'Graduate',
    'Incomplete higher': 'College',
    'Lower secondary': 'Junior High School',
    'Secondary / secondary special': 'High School'
}

# Apply mapping to the 'Education' column
credit_case_study_data['Education'] = credit_case_study_data['Education'].map(education_mapping)

credit_case_study_data.head(10)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric
0,100002,25,M,High School,Single / not married,$120k+,202500.0
1,100003,45,F,Graduate,Married,$120k+,270000.0
2,100004,52,M,High School,Single / not married,$60k to $80k,67500.0
3,100006,52,F,High School,Civil marriage,$120k+,135000.0
4,100007,54,M,High School,Single / not married,$120k+,121500.0
5,100008,46,M,High School,Married,$80k to $100k,99000.0
6,100009,37,F,Graduate,Married,$120k+,171000.0
7,100010,51,M,Graduate,Married,$120k+,360000.0
8,100011,55,F,High School,Married,$100k to $120k,112500.0
9,100012,39,M,High School,Single / not married,$120k+,135000.0


In [91]:
filtered_data = credit_case_study_data[
    (credit_case_study_data['Customer Age'] < 25) &
    (credit_case_study_data['Education'] == 'High School') &
    (credit_case_study_data['Marital Status'] == 'Single / not married') 
    # (credit_case_study_data['Income Category'] == '$40k to 60k')
]

filtered_data.head(10)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric
15,100019,23,M,High School,Single / not married,$120k+,157500.0
163,100192,21,F,High School,Single / not married,$100k to $120k,111915.0
228,100265,22,F,High School,Single / not married,$100k to $120k,112500.0
297,100342,23,M,High School,Single / not married,$80k to $100k,90000.0
351,100405,23,M,High School,Single / not married,$120k+,180000.0
430,100497,24,M,High School,Single / not married,$120k+,135000.0
649,100741,23,M,High School,Single / not married,$80k to $100k,90000.0
670,100775,22,M,High School,Single / not married,$100k to $120k,112500.0
758,100871,21,M,High School,Single / not married,$80k to $100k,90288.0
767,100881,21,M,High School,Single / not married,$120k+,243000.0


In [92]:
customerB = credit_case_study_data[
    (credit_case_study_data['Customer ID'] ==  100871) 
]

customerB['Income Category'] = '$40k - $60k'
customerB['Income Numeric'] = 46000

customerB.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  customerB['Income Category'] = '$40k - $60k'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  customerB['Income Numeric'] = 46000


Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric
758,100871,21,M,High School,Single / not married,$40k - $60k,46000


In [93]:
customerB['Snapshot Month'] = pd.to_datetime('2016-3-31')
customerB['Month on Book'] = 1
customerB['Credit_Limit'] = (customerB['Income Numeric'] * 0.10).round(-3)

start_date = pd.to_datetime('2016-3-31')
end_date = pd.to_datetime('2017-12-31')
dates = pd.date_range(start=start_date, end=end_date, freq='M')

# Create the new DataFrame with time series data
time_series_customerB = []

# Iterate through the dates to generate the rows
for date in dates:
    # Copy the customer data for each row
    new_row = customerB.copy()
    
    # Update snapshot month to the current date
    new_row['Snapshot Month'] = date
    
    # Update Customer Age: +1 every year
    new_row['Customer Age'] = customerB['Customer Age'] + (date.year - start_date.year)
    
    # Update MoB: +1 for each row
    new_row['Month on Book'] = customerB['Month on Book'] + (date.year - start_date.year) * 12 + (date.month - start_date.month)
    
    # Append the new row to the list
    time_series_customerB.append(new_row)
    
# Concatenate all rows into a single DataFrame
customerB = pd.concat(time_series_customerB, ignore_index=True)

customerB.head(10)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  customerB['Snapshot Month'] = pd.to_datetime('2016-3-31')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  customerB['Month on Book'] = 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  customerB['Credit_Limit'] = (customerB['Income Numeric'] * 0.10).round(-3)
  dates = pd.date_range(start=start_date,

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit
0,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-03-31,1,5000.0
1,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-04-30,2,5000.0
2,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-05-31,3,5000.0
3,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-06-30,4,5000.0
4,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-07-31,5,5000.0
5,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-08-31,6,5000.0
6,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-09-30,7,5000.0
7,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-10-31,8,5000.0
8,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-11-30,9,5000.0
9,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-12-31,10,5000.0


In [94]:
initial_fico = 690  
min_utilization = 0.17  
max_utilization = 0.23
risk_start_date = pd.to_datetime('2017-03-31')  # Starting point for Delinquency
max_peak_utilization = 0.99  # Peak utilization during Delinquency period
fico_deterioration_rate = 5  # Deterioration in FICO score per 10% increase in utilization
fico_fluctuation_rate = 1
external_bank_credit_card_max_util_greater_than_50_base = 1
external_bank_credit_card_max_util_greater_than_90_base = 0
credit_inquiy_base = 1


# Function to adjust FICO based on utilization
def calculate_fico(utilization, fico_score):
    # FICO deteriorates as utilization increases
    if utilization > 0.20:
        fico_deduction = fico_deterioration_rate * ((utilization - 0.20) // 0.10)
        fico_score = max(fico_score - fico_deduction, 300)  # FICO score can't go below 300
    return fico_score

# Function to adjust FICO based on utilization
def fluctuate_fico(utilization, fico_score):
    fico_deduction = fico_fluctuation_rate * ((utilization - 0.20) // 0.025)
    fico_score = max(fico_score - fico_deduction, 300) 
    return fico_score

# Function to generate monthly utilization and FICO
def generate_utilization_fico(df):
    fico_scores = []
    utilizations = []
    external_bank_credit_card_max_util_greater_than_50 = []
    external_bank_credit_card_max_util_greater_than_90 = []
    credit_inquiries = []
    
    for index, row in df.iterrows():
        snapshot_date = row['Snapshot Month']
        
        # For March 2016 to March 2017, utilization is around 20%
        if snapshot_date < risk_start_date:
            utilization = random.uniform(min_utilization, max_utilization)
            fico_score = fluctuate_fico(utilization, initial_fico)
            credit_card_max_util_greater_than_50 = external_bank_credit_card_max_util_greater_than_50_base
            credit_card_max_util_greater_than_90 = external_bank_credit_card_max_util_greater_than_90_base
            credit_inquiry = credit_inquiy_base
        else:
            # From March 2017 onwards, use exponential increase for utilization
            months_since_march_2017 = (snapshot_date - risk_start_date).days // 30
            
            if months_since_march_2017 <= 2:
                # First 2 months: 10% increase
                utilization = 0.20 + 0.10 * months_since_march_2017 + random.uniform(0.01,0.02)  # Cap utilization at 30%
                credit_card_max_util_greater_than_50 = external_bank_credit_card_max_util_greater_than_50_base + 1
                credit_card_max_util_greater_than_90 = external_bank_credit_card_max_util_greater_than_90_base + 1
                credit_inquiry = credit_inquiy_base + 2

            elif 3 <= months_since_march_2017 <= 4:
                # Months 3-5: Steep increase (more than 30% increase)
                utilization = min(0.30 + 0.30 * (months_since_march_2017 - 2) + random.uniform(0.01,0.02), 0.90)  # Cap at 90%
                credit_card_max_util_greater_than_50 = external_bank_credit_card_max_util_greater_than_50_base + 2
                credit_card_max_util_greater_than_90 = external_bank_credit_card_max_util_greater_than_90_base + 2
                credit_inquiry = credit_inquiy_base + 2

            else:
                # After reaching 90%, slow increase by 1% per month
                utilization = min(0.90 + 0.01 * (months_since_march_2017 - 5) + random.uniform(0.005,0.009), max_peak_utilization)
                credit_card_max_util_greater_than_50 = external_bank_credit_card_max_util_greater_than_50_base + 2
                credit_card_max_util_greater_than_90 = external_bank_credit_card_max_util_greater_than_90_base + 3
                credit_inquiry = credit_inquiy_base + 3
                
            fico_score = calculate_fico(utilization, initial_fico)

        utilizations.append(utilization)
        fico_scores.append(fico_score)
        external_bank_credit_card_max_util_greater_than_50.append(credit_card_max_util_greater_than_50)
        external_bank_credit_card_max_util_greater_than_90.append(credit_card_max_util_greater_than_90)
        credit_inquiries.append(credit_inquiry)
    
    df['Utilization'] = utilizations
    df['FICO'] = fico_scores
    df['external_bank_credit_card_max_util_greater_than_50'] = external_bank_credit_card_max_util_greater_than_50
    df['external_bank_credit_card_max_util_greater_than_90'] = external_bank_credit_card_max_util_greater_than_90
    df['Credit_Inquiries'] = credit_inquiries
    return df

# Generate the utilization and FICO for the customerB DataFrame
customerB = generate_utilization_fico(customerB)

customerB.head(50)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit,Utilization,FICO,external_bank_credit_card_max_util_greater_than_50,external_bank_credit_card_max_util_greater_than_90,Credit_Inquiries
0,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-03-31,1,5000.0,0.199797,691.0,1,0,1
1,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-04-30,2,5000.0,0.210749,690.0,1,0,1
2,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-05-31,3,5000.0,0.224826,690.0,1,0,1
3,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-06-30,4,5000.0,0.213212,690.0,1,0,1
4,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-07-31,5,5000.0,0.200085,690.0,1,0,1
5,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-08-31,6,5000.0,0.187928,691.0,1,0,1
6,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-09-30,7,5000.0,0.174629,692.0,1,0,1
7,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-10-31,8,5000.0,0.209946,690.0,1,0,1
8,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-11-30,9,5000.0,0.194183,691.0,1,0,1
9,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-12-31,10,5000.0,0.227092,689.0,1,0,1


In [95]:
customerB = customerB.sort_values(by='Month on Book', ascending=False)

customerB['Delinquency'] = 0
max_Delinquency = 7
for i in range(max_Delinquency + 1):
    if i < len(customerB):
        customerB.iloc[i, customerB.columns.get_loc('Delinquency')] = max_Delinquency - i

pre_risk_data = customerB[(customerB['Snapshot Month'] < risk_start_date) & (customerB['Month on Book']>=3)]
if not pre_risk_data.empty:
    highest_utilization_index = pre_risk_data['Utilization'].idxmax()
    # Set the Delinquency of the highest utilization row to 1
    customerB.at[highest_utilization_index, 'Delinquency'] = 1

customerB = customerB.sort_values(by='Snapshot Month').reset_index(drop=True)

In [96]:
revolving_bal_percent = 0.85 + random.uniform(-0.02, 0.02)
customerB['Revolving_Bal'] = revolving_bal_percent * customerB['Utilization'] * customerB['Credit_Limit']

revolving_balance_delinquency_3 = customerB.loc[customerB['Delinquency'] == 3, 'Revolving_Bal'].iloc[0]
customerB.loc[customerB['Delinquency'] > 3, 'Revolving_Bal'] = revolving_balance_delinquency_3


customerB.head(50)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit,Utilization,FICO,external_bank_credit_card_max_util_greater_than_50,external_bank_credit_card_max_util_greater_than_90,Credit_Inquiries,Delinquency,Revolving_Bal
0,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-03-31,1,5000.0,0.199797,691.0,1,0,1,0,843.384521
1,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-04-30,2,5000.0,0.210749,690.0,1,0,1,0,889.614333
2,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-05-31,3,5000.0,0.224826,690.0,1,0,1,0,949.035188
3,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-06-30,4,5000.0,0.213212,690.0,1,0,1,0,900.011104
4,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-07-31,5,5000.0,0.200085,690.0,1,0,1,0,844.599156
5,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-08-31,6,5000.0,0.187928,691.0,1,0,1,0,793.284419
6,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-09-30,7,5000.0,0.174629,692.0,1,0,1,0,737.14325
7,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-10-31,8,5000.0,0.209946,690.0,1,0,1,0,886.225825
8,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-11-30,9,5000.0,0.194183,691.0,1,0,1,0,819.685038
9,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-12-31,10,5000.0,0.227092,689.0,1,0,1,1,958.602223


In [97]:
initial_debt = random.uniform(31000, 33000)  # Start value around 32000
debt_reduction = random.uniform(2400, 2600)  # Monthly reduction around 2500

# Calculate Total_Debt for each month
total_debt_values = []

for i in range(len(customerB)):
    if i == 0:
        # Initialize the first row's Total Debt
        total_debt = initial_debt
    else:
        # Get previous month's Total Debt
        total_debt = total_debt_values[-1]
        
        # If Delinquency is 0, add revolving balance based on external_bank_credit_card_max_util_greater_than_50
        # if customerB.loc[i, 'Delinquency'] == 0:
        total_debt = total_debt  - debt_reduction
        multiplier = min(customerB.loc[i, 'external_bank_credit_card_max_util_greater_than_50']+1, 3)
        total_debt = total_debt + customerB.loc[i, 'Revolving_Bal'] * multiplier
        # else:
        #     multiplier = min(customerB.loc[i, 'external_bank_credit_card_max_util_greater_than_50']+1, 3)
        #     total_debt = total_debt + customerB.loc[i, 'Revolving_Bal'] * multiplier
    
    total_debt_values.append(total_debt)

# Assign the computed Total_Debt values to the DataFrame
customerB['Total_Debt'] = total_debt_values
customerB['Debt_to_Income_Ratio'] = (
    np.random.uniform(500, 600, size=len(customerB)) + 
    np.minimum(customerB['external_bank_credit_card_max_util_greater_than_50'] + 1, 3) * customerB['Revolving_Bal']
) / (customerB['Income Numeric'] / 12)

customerB.head(50)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit,Utilization,FICO,external_bank_credit_card_max_util_greater_than_50,external_bank_credit_card_max_util_greater_than_90,Credit_Inquiries,Delinquency,Revolving_Bal,Total_Debt,Debt_to_Income_Ratio
0,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-03-31,1,5000.0,0.199797,691.0,1,0,1,0,843.384521,32932.018556,0.583597
1,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-04-30,2,5000.0,0.210749,690.0,1,0,1,0,889.614333,32242.92844,0.620058
2,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-05-31,3,5000.0,0.224826,690.0,1,0,1,0,949.035188,31672.680033,0.63847
3,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-06-30,4,5000.0,0.213212,690.0,1,0,1,0,900.011104,31004.38346,0.605092
4,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-07-31,5,5000.0,0.200085,690.0,1,0,1,0,844.599156,30225.26299,0.573843
5,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-08-31,6,5000.0,0.187928,691.0,1,0,1,0,793.284419,29343.513047,0.551622
6,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-09-30,7,5000.0,0.174629,692.0,1,0,1,0,737.14325,28349.480765,0.536075
7,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-10-31,8,5000.0,0.209946,690.0,1,0,1,0,886.225825,27653.613634,0.597154
8,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-11-30,9,5000.0,0.194183,691.0,1,0,1,0,819.685038,26824.664928,0.575721
9,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-12-31,10,5000.0,0.227092,689.0,1,0,1,1,958.602223,26273.550591,0.636783


In [98]:
interchange_fee_factor = 0.02
interest_rate_monthly = 24.61 / 100 / 12

# Calculate Interchange Fee for all rows initially
customerB['Interchange Fee'] = customerB.apply(
    lambda row: row['Utilization'] * row['Credit_Limit'] * interchange_fee_factor, axis=1
)

# Get the last 7 rows based on Snapshot Month ascending order
last_7_indices = customerB.sort_values(by='Snapshot Month').index[-7:]

# Update Interchange Fee for the last 7 rows
for i in range(1, len(last_7_indices)):
    current_index = last_7_indices[i]
    previous_index = last_7_indices[i - 1]
    
    # Calculate the Interchange Fee for last 7 rows based on revolving balance difference
    revolving_difference = customerB.loc[current_index, 'Revolving_Bal'] - customerB.loc[previous_index, 'Revolving_Bal'] - 30
    interchange_fee = revolving_difference / (1 + interest_rate_monthly) * interchange_fee_factor
    
    # Update the Interchange Fee for the current row
    customerB.at[current_index, 'Interchange Fee'] = max(interchange_fee,0)

customerB.loc[customerB['Delinquency'] > 3, 'Interchange Fee'] = 0


customerB['Late_Fee_Revenue'] = 0
customerB.loc[customerB['Delinquency'] > 0, 'Late_Fee_Revenue'] = 30
customerB.loc[customerB['Delinquency'] > 3, 'Late_Fee_Revenue'] = 0

customerB['Monthly_Interest_Revenue'] = customerB['Revolving_Bal'] * interest_rate_monthly
customerB.loc[customerB['Delinquency'] > 0, 'Monthly_Interest_Revenue'] = 0

customerB['Annual_Fee'] = np.where(
    customerB['Month on Book'] % 12 == 0,
    100,
    0
)

customerB['Total Revenue'] = customerB['Interchange Fee'] + customerB['Late_Fee_Revenue'] + customerB['Monthly_Interest_Revenue'] + customerB['Annual_Fee'] 

customerB.head(50)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit,...,Credit_Inquiries,Delinquency,Revolving_Bal,Total_Debt,Debt_to_Income_Ratio,Interchange Fee,Late_Fee_Revenue,Monthly_Interest_Revenue,Annual_Fee,Total Revenue
0,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-03-31,1,5000.0,...,1,0,843.384521,32932.018556,0.583597,19.979706,0,17.296411,0,37.276117
1,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-04-30,2,5000.0,...,1,0,889.614333,32242.92844,0.620058,21.074887,0,18.244507,0,39.319394
2,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-05-31,3,5000.0,...,1,0,949.035188,31672.680033,0.63847,22.482562,0,19.46313,0,41.945692
3,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-06-30,4,5000.0,...,1,0,900.011104,31004.38346,0.605092,21.321185,0,18.457728,0,39.778913
4,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-07-31,5,5000.0,...,1,0,844.599156,30225.26299,0.573843,20.008481,0,17.321321,0,37.329802
5,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-08-31,6,5000.0,...,1,0,793.284419,29343.513047,0.551622,18.792839,0,16.268941,0,35.061781
6,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-09-30,7,5000.0,...,1,0,737.14325,28349.480765,0.536075,17.46286,0,15.117579,0,32.580439
7,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-10-31,8,5000.0,...,1,0,886.225825,27653.613634,0.597154,20.994613,0,18.175015,0,39.169628
8,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-11-30,9,5000.0,...,1,0,819.685038,26824.664928,0.575721,19.418268,0,16.810374,0,36.228642
9,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-12-31,10,5000.0,...,1,1,958.602223,26273.550591,0.636783,22.709204,30,0.0,0,52.709204


In [99]:
weight_utilization = 0.4
delinquency_weights = {
    0: 0.05,
    1: 0.15,
    2: 0.4,
    3: 0.65,
    4: 0.72,
    5: 0.8,
    6: 0.84,
    7: 0.86
}
customerB['Delinquency_Weight'] = customerB['Delinquency'].map(delinquency_weights).fillna(0) 
weight_debt_income_ratio = 0.1

# Normalize each factor (optional) and calculate ECL
customerB['ECL Ratio'] = (
    weight_utilization * customerB['Utilization'] * 0.02 
    +
    weight_debt_income_ratio * customerB['Debt_to_Income_Ratio'] * 0.01
    +
    customerB['Delinquency_Weight']
)

customerB['ECL'] = customerB['ECL Ratio'] * customerB['Credit_Limit']  * customerB['Utilization'] 

customerB.head(50)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit,...,Total_Debt,Debt_to_Income_Ratio,Interchange Fee,Late_Fee_Revenue,Monthly_Interest_Revenue,Annual_Fee,Total Revenue,Delinquency_Weight,ECL Ratio,ECL
0,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-03-31,1,5000.0,...,32932.018556,0.583597,19.979706,0,17.296411,0,37.276117,0.05,0.052182,52.129025
1,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-04-30,2,5000.0,...,32242.92844,0.620058,21.074887,0,18.244507,0,39.319394,0.05,0.052306,55.117203
2,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-05-31,3,5000.0,...,31672.680033,0.63847,22.482562,0,19.46313,0,41.945692,0.05,0.052437,58.945988
3,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-06-30,4,5000.0,...,31004.38346,0.605092,21.321185,0,18.457728,0,39.778913,0.05,0.052311,55.766398
4,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-07-31,5,5000.0,...,30225.26299,0.573843,20.008481,0,17.321321,0,37.329802,0.05,0.052175,52.196646
5,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-08-31,6,5000.0,...,29343.513047,0.551622,18.792839,0,16.268941,0,35.061781,0.05,0.052055,48.913109
6,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-09-30,7,5000.0,...,28349.480765,0.536075,17.46286,0,15.117579,0,32.580439,0.05,0.051933,45.345025
7,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-10-31,8,5000.0,...,27653.613634,0.597154,20.994613,0,18.175015,0,39.169628,0.05,0.052277,54.876479
8,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-11-30,9,5000.0,...,26824.664928,0.575721,19.418268,0,16.810374,0,36.228642,0.05,0.052129,50.612921
9,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-12-31,10,5000.0,...,26273.550591,0.636783,22.709204,30,0.0,0,52.709204,0.15,0.152454,173.104902


In [101]:
# Calculate the month-over-month ECL charge
customerB['ECL MoM Charge'] = customerB['ECL'].diff()
customerB.loc[0, 'ECL MoM Charge'] = customerB.loc[0, 'ECL']  # Set the first value to the first ECL

# Calculate Profit and cumulative metrics
customerB['Profit'] = customerB['Total Revenue'] - customerB['ECL MoM Charge']
customerB['Cumulative Profit'] = customerB['Profit'].cumsum()

# Calculate the month-over-month change in Cumulative Profit
customerB['MoM Cumulative Profit Change'] = customerB['Cumulative Profit'].diff()
customerB.loc[0, 'MoM Cumulative Profit Change'] = customerB.loc[0, 'Cumulative Profit']  # Set the first value

In [102]:

customerB.head(100)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit,...,Monthly_Interest_Revenue,Annual_Fee,Total Revenue,Delinquency_Weight,ECL Ratio,ECL,ECL MoM Charge,Profit,Cumulative Profit,MoM Cumulative Profit Change
0,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-03-31,1,5000.0,...,17.296411,0,37.276117,0.05,0.052182,52.129025,52.129025,-14.852908,-14.852908,-14.852908
1,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-04-30,2,5000.0,...,18.244507,0,39.319394,0.05,0.052306,55.117203,2.988177,36.331216,21.478308,36.331216
2,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-05-31,3,5000.0,...,19.46313,0,41.945692,0.05,0.052437,58.945988,3.828785,38.116907,59.595215,38.116907
3,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-06-30,4,5000.0,...,18.457728,0,39.778913,0.05,0.052311,55.766398,-3.179589,42.958502,102.553717,42.958502
4,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-07-31,5,5000.0,...,17.321321,0,37.329802,0.05,0.052175,52.196646,-3.569753,40.899555,143.453272,40.899555
5,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-08-31,6,5000.0,...,16.268941,0,35.061781,0.05,0.052055,48.913109,-3.283537,38.345317,181.798589,38.345317
6,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-09-30,7,5000.0,...,15.117579,0,32.580439,0.05,0.051933,45.345025,-3.568084,36.148523,217.947112,36.148523
7,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-10-31,8,5000.0,...,18.175015,0,39.169628,0.05,0.052277,54.876479,9.531454,29.638174,247.585286,29.638174
8,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-11-30,9,5000.0,...,16.810374,0,36.228642,0.05,0.052129,50.612921,-4.263558,40.492199,288.077486,40.492199
9,100871,21,M,High School,Single / not married,$40k - $60k,46000,2016-12-31,10,5000.0,...,0.0,0,52.709204,0.15,0.152454,173.104902,122.491981,-69.782777,218.294709,-69.782777
