In [1]:
import pandas as pd 
import numpy as np 
import os
from datetime import datetime, timedelta
import random

In [2]:
base_dir = os.path.dirname(os.path.abspath('data Customer C.ipynb'))
credit_case_study_folder = os.path.join(base_dir, 'Credit_EDA_case_study')
application_file_name = 'application_data.csv'

credit_case_study_data = pd.read_csv(os.path.join(credit_case_study_folder,application_file_name))
credit_case_study_data.head(10)

Unnamed: 0,SK_ID_CURR,TARGET,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,...,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100002,1,Cash loans,M,N,Y,0,202500.0,406597.5,24700.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
1,100003,0,Cash loans,F,N,N,0,270000.0,1293502.5,35698.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,100004,0,Revolving loans,M,Y,Y,0,67500.0,135000.0,6750.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
3,100006,0,Cash loans,F,N,Y,0,135000.0,312682.5,29686.5,...,0,0,0,0,,,,,,
4,100007,0,Cash loans,M,N,Y,0,121500.0,513000.0,21865.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
5,100008,0,Cash loans,M,N,Y,0,99000.0,490495.5,27517.5,...,0,0,0,0,0.0,0.0,0.0,0.0,1.0,1.0
6,100009,0,Cash loans,F,Y,Y,1,171000.0,1560726.0,41301.0,...,0,0,0,0,0.0,0.0,0.0,1.0,1.0,2.0
7,100010,0,Cash loans,M,Y,Y,0,360000.0,1530000.0,42075.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
8,100011,0,Cash loans,F,N,Y,0,112500.0,1019610.0,33826.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
9,100012,0,Revolving loans,M,N,Y,0,135000.0,405000.0,20250.0,...,0,0,0,0,,,,,,


In [3]:
credit_case_study_data = credit_case_study_data[[
    'SK_ID_CURR', 'DAYS_BIRTH', 'CODE_GENDER', 'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'AMT_INCOME_TOTAL'
]]
credit_case_study_data.head(10)

Unnamed: 0,SK_ID_CURR,DAYS_BIRTH,CODE_GENDER,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,AMT_INCOME_TOTAL
0,100002,-9461,M,Secondary / secondary special,Single / not married,202500.0
1,100003,-16765,F,Higher education,Married,270000.0
2,100004,-19046,M,Secondary / secondary special,Single / not married,67500.0
3,100006,-19005,F,Secondary / secondary special,Civil marriage,135000.0
4,100007,-19932,M,Secondary / secondary special,Single / not married,121500.0
5,100008,-16941,M,Secondary / secondary special,Married,99000.0
6,100009,-13778,F,Higher education,Married,171000.0
7,100010,-18850,M,Higher education,Married,360000.0
8,100011,-20099,F,Secondary / secondary special,Married,112500.0
9,100012,-14469,M,Secondary / secondary special,Single / not married,135000.0


In [4]:
rename_dict = {
    'SK_ID_CURR' : 'Customer ID', 
    'DAYS_BIRTH': 'Customer Age', 
    'CODE_GENDER': 'Gender', 
    'NAME_EDUCATION_TYPE': 'Education', 
    'NAME_FAMILY_STATUS': 'Marital Status', 
    'AMT_INCOME_TOTAL': 'Income Category'
}

credit_case_study_data = credit_case_study_data.rename(columns=rename_dict)
credit_case_study_data.head(10)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category
0,100002,-9461,M,Secondary / secondary special,Single / not married,202500.0
1,100003,-16765,F,Higher education,Married,270000.0
2,100004,-19046,M,Secondary / secondary special,Single / not married,67500.0
3,100006,-19005,F,Secondary / secondary special,Civil marriage,135000.0
4,100007,-19932,M,Secondary / secondary special,Single / not married,121500.0
5,100008,-16941,M,Secondary / secondary special,Married,99000.0
6,100009,-13778,F,Higher education,Married,171000.0
7,100010,-18850,M,Higher education,Married,360000.0
8,100011,-20099,F,Secondary / secondary special,Married,112500.0
9,100012,-14469,M,Secondary / secondary special,Single / not married,135000.0


In [5]:
credit_case_study_data['Customer Age'] = abs(credit_case_study_data['Customer Age']/365).astype(int)

credit_case_study_data['Income Numeric'] = credit_case_study_data['Income Category']
income_bins = [0, 40000, 60000, 80000, 100000, 120000, float('inf')]  
income_labels = ['Less than $40k', '$40k - $60k', '$60k to $80k', '$80k to $100k', '$100k to $120k', '$120k+']
credit_case_study_data['Income Category'] = pd.cut(credit_case_study_data['Income Category'], bins=income_bins, labels=income_labels, right=False)

education_mapping = {
    'Academic degree': 'College',
    'Higher education': 'Graduate',
    'Incomplete higher': 'College',
    'Lower secondary': 'Junior High School',
    'Secondary / secondary special': 'High School'
}
credit_case_study_data['Education'] = credit_case_study_data['Education'].map(education_mapping)

Marital_mapping = {
    'Civil marriage': 'Married',
    'Married': 'Married',
    'Seperated': 'Divorced',
    'Single / not married': 'Single',
    'Widow': 'Divorced',
    'Unknown': 'Unknown'
}
credit_case_study_data['Marital Status'] = credit_case_study_data['Marital Status'].map(Marital_mapping)


credit_case_study_data.head(10)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric
0,100002,25,M,High School,Single,$120k+,202500.0
1,100003,45,F,Graduate,Married,$120k+,270000.0
2,100004,52,M,High School,Single,$60k to $80k,67500.0
3,100006,52,F,High School,Married,$120k+,135000.0
4,100007,54,M,High School,Single,$120k+,121500.0
5,100008,46,M,High School,Married,$80k to $100k,99000.0
6,100009,37,F,Graduate,Married,$120k+,171000.0
7,100010,51,M,Graduate,Married,$120k+,360000.0
8,100011,55,F,High School,Married,$100k to $120k,112500.0
9,100012,39,M,High School,Single,$120k+,135000.0


In [6]:
filtered_data = credit_case_study_data[
    (credit_case_study_data['Customer Age'] > 39) &
    (credit_case_study_data['Customer Age'] < 42) &
    (credit_case_study_data['Education'] == 'College') &
    (credit_case_study_data['Gender'] == 'M') &
    (credit_case_study_data['Marital Status'] == 'Single') &
    (credit_case_study_data['Income Category'] == '$120k+')
]

filtered_data.head(10)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric
1453,101708,40,M,College,Single,$120k+,189000.0
24767,128804,41,M,College,Single,$120k+,342000.0
102345,218813,41,M,College,Single,$120k+,180000.0
108526,225886,40,M,College,Single,$120k+,202500.0
117320,236054,40,M,College,Single,$120k+,225000.0
119835,238962,40,M,College,Single,$120k+,270000.0
191204,321698,40,M,College,Single,$120k+,315000.0
197859,329404,40,M,College,Single,$120k+,180000.0
224732,360297,40,M,College,Single,$120k+,225000.0
238445,376151,41,M,College,Single,$120k+,202500.0


In [7]:
customerE = credit_case_study_data[
    (credit_case_study_data['Customer ID'] ==  101708) 
]

customerE.head(10)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric
1453,101708,40,M,College,Single,$120k+,189000.0


In [8]:
customerE['Snapshot Month'] = pd.to_datetime('2020-12-31')
customerE['Month on Book'] = 1
customerE['Credit_Limit'] = (customerE['Income Numeric'] * 0.08).round(-3)

start_date = pd.to_datetime('2020-12-31')
end_date = pd.to_datetime('2024-10-31')
dates = pd.date_range(start=start_date, end=end_date, freq='M')

# Create the new DataFrame with time series data
time_series_customerE = []

# Iterate through the dates to generate the rows
for date in dates:
    # Copy the customer data for each row
    new_row = customerE.copy()
    
    # Update snapshot month to the current date
    new_row['Snapshot Month'] = date
    
    # Update Customer Age: +1 every year
    new_row['Customer Age'] = customerE['Customer Age'] + (date.year - start_date.year)
    
    # Update MoB: +1 for each row
    new_row['Month on Book'] = customerE['Month on Book'] + (date.year - start_date.year) * 12 + (date.month - start_date.month)
    
    # Append the new row to the list
    time_series_customerE.append(new_row)
    
# Concatenate all rows into a single DataFrame
customerE = pd.concat(time_series_customerE, ignore_index=True)

customerE.head(10)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  customerE['Snapshot Month'] = pd.to_datetime('2020-12-31')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  customerE['Month on Book'] = 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  customerE['Credit_Limit'] = (customerE['Income Numeric'] * 0.08).round(-3)
  dates = pd.date_range(start=start_date

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit
0,101708,40,M,College,Single,$120k+,189000.0,2020-12-31,1,15000.0
1,101708,41,M,College,Single,$120k+,189000.0,2021-01-31,2,15000.0
2,101708,41,M,College,Single,$120k+,189000.0,2021-02-28,3,15000.0
3,101708,41,M,College,Single,$120k+,189000.0,2021-03-31,4,15000.0
4,101708,41,M,College,Single,$120k+,189000.0,2021-04-30,5,15000.0
5,101708,41,M,College,Single,$120k+,189000.0,2021-05-31,6,15000.0
6,101708,41,M,College,Single,$120k+,189000.0,2021-06-30,7,15000.0
7,101708,41,M,College,Single,$120k+,189000.0,2021-07-31,8,15000.0
8,101708,41,M,College,Single,$120k+,189000.0,2021-08-31,9,15000.0
9,101708,41,M,College,Single,$120k+,189000.0,2021-09-30,10,15000.0


In [9]:
initial_fico = 760
min_utilization = 0.28
max_utilization = 0.35
risk_start_date = pd.to_datetime('2024-1-31')  # Starting point for Delinquency
max_peak_utilization = 0.99  # Peak utilization during Delinquency period
fico_deterioration_rate = 8  # Deterioration in FICO score per 10% increase in utilization
fico_fluctuation_rate = 1
external_bank_credit_card_max_util_greater_than_50_base = 0
external_bank_credit_card_max_util_greater_than_90_base = 0
credit_inquiy_base = 1

customerE['Income Category'] = np.where(
    customerE['Snapshot Month'] < risk_start_date,
    '$120k+',
    'Less than $40k'
)
customerE['Income Numeric'] = np.where(
    customerE['Snapshot Month'] < risk_start_date,
    189000,
    0
)

# Function to adjust FICO based on utilization
def calculate_fico(utilization, fico_score):
    # FICO deteriorates as utilization increases
    if utilization > 0.20:
        fico_deduction = fico_deterioration_rate * ((utilization - 0.20) // 0.10)
        fico_score = max(fico_score - fico_deduction, 300)  # FICO score can't go below 300
    return fico_score

# Function to adjust FICO based on utilization
def fluctuate_fico(utilization, fico_score):
    fico_deduction = fico_fluctuation_rate * ((utilization - 0.20) // 0.025)
    fico_score = max(fico_score - fico_deduction, 300) 
    return fico_score

# Function to generate monthly utilization and FICO
def generate_utilization_fico(df):
    fico_scores = []
    utilizations = []
    external_bank_credit_card_max_util_greater_than_50 = []
    external_bank_credit_card_max_util_greater_than_90 = []
    credit_inquiries = []
    
    for index, row in df.iterrows():
        snapshot_date = row['Snapshot Month']
        MoB = row['Month on Book']
        
        if snapshot_date < risk_start_date:
            utilization = random.uniform(min_utilization, max_utilization)
            fico_score = fluctuate_fico(utilization, initial_fico)
            credit_card_max_util_greater_than_50 = external_bank_credit_card_max_util_greater_than_50_base
            credit_card_max_util_greater_than_90 = external_bank_credit_card_max_util_greater_than_90_base
            if MoB <= 12:
                credit_inquiry = credit_inquiy_base
            else:
                credit_inquiry = 0
        else:
            months_since_risk_start_date = (snapshot_date - risk_start_date).days // 30
            
            if months_since_risk_start_date <= 3:
                # First 4 months: 
                utilization =  0.20 + 0.2 * months_since_risk_start_date + random.uniform(0.01,0.02)  
                credit_card_max_util_greater_than_50 = external_bank_credit_card_max_util_greater_than_50_base + 2
                credit_card_max_util_greater_than_90 = external_bank_credit_card_max_util_greater_than_90_base + 1 
                credit_inquiry = credit_inquiy_base 

            else:
                # After reaching 90%, slow increase by 0.8% per month
                utilization = random.uniform(0.98,1)
                credit_card_max_util_greater_than_50 = external_bank_credit_card_max_util_greater_than_50_base + 2
                credit_card_max_util_greater_than_90 = external_bank_credit_card_max_util_greater_than_90_base + 2
                credit_inquiry = credit_inquiy_base 
                
            fico_score = calculate_fico(utilization, initial_fico)

        utilizations.append(utilization)
        fico_scores.append(fico_score)
        external_bank_credit_card_max_util_greater_than_50.append(credit_card_max_util_greater_than_50)
        external_bank_credit_card_max_util_greater_than_90.append(credit_card_max_util_greater_than_90)
        credit_inquiries.append(credit_inquiry)
    
    df['Utilization'] = utilizations
    df['FICO'] = fico_scores
    df['external_bank_credit_card_max_util_greater_than_50'] = external_bank_credit_card_max_util_greater_than_50
    df['external_bank_credit_card_max_util_greater_than_90'] = external_bank_credit_card_max_util_greater_than_90
    df['Credit_Inquiries'] = credit_inquiries
    return df

# Generate the utilization and FICO for the customerE DataFrame
customerE = generate_utilization_fico(customerE)

customerE.head(100)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit,Utilization,FICO,external_bank_credit_card_max_util_greater_than_50,external_bank_credit_card_max_util_greater_than_90,Credit_Inquiries
0,101708,40,M,College,Single,$120k+,189000,2020-12-31,1,15000.0,0.320402,756.0,0,0,1
1,101708,41,M,College,Single,$120k+,189000,2021-01-31,2,15000.0,0.33559,755.0,0,0,1
2,101708,41,M,College,Single,$120k+,189000,2021-02-28,3,15000.0,0.336946,755.0,0,0,1
3,101708,41,M,College,Single,$120k+,189000,2021-03-31,4,15000.0,0.283871,757.0,0,0,1
4,101708,41,M,College,Single,$120k+,189000,2021-04-30,5,15000.0,0.339113,755.0,0,0,1
5,101708,41,M,College,Single,$120k+,189000,2021-05-31,6,15000.0,0.304578,756.0,0,0,1
6,101708,41,M,College,Single,$120k+,189000,2021-06-30,7,15000.0,0.280835,757.0,0,0,1
7,101708,41,M,College,Single,$120k+,189000,2021-07-31,8,15000.0,0.284526,757.0,0,0,1
8,101708,41,M,College,Single,$120k+,189000,2021-08-31,9,15000.0,0.3116,756.0,0,0,1
9,101708,41,M,College,Single,$120k+,189000,2021-09-30,10,15000.0,0.285149,757.0,0,0,1


In [10]:
customerE['Delinquency'] = 0
customerE = customerE.sort_values(by='Snapshot Month').reset_index(drop=True)

In [11]:
Revolving_balance_base = 0

# Function to generate monthly revolving balance
def generate_revolving_balance(df):
    revolving_balance = []
    
    for index, row in df.iterrows():
        snapshot_date = row['Snapshot Month']
        credit_limit = row['Credit_Limit']
        utilization = row['Utilization']
        
        if snapshot_date < risk_start_date:
            revolving_bal = Revolving_balance_base
        else:
            months_since_risk_start_date = (snapshot_date - risk_start_date).days // 30
            
            if months_since_risk_start_date <= 3:
                # First 4 months: 
                revolving_bal = credit_limit * utilization * 0.9

            else:
                revolving_bal = credit_limit * utilization

        revolving_balance.append(revolving_bal)
    
    df['Revolving_Bal'] = revolving_balance

    return df

# Generate the utilization and FICO for the customerE DataFrame
customerE = generate_revolving_balance(customerE)

# revolving_balance_delinquency_3 = customerE.loc[customerE['Delinquency'] == 3, 'Revolving_Bal'].iloc[0]
# customerE.loc[customerE['Delinquency'] > 3, 'Revolving_Bal'] = revolving_balance_delinquency_3

customerE.head(100)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit,Utilization,FICO,external_bank_credit_card_max_util_greater_than_50,external_bank_credit_card_max_util_greater_than_90,Credit_Inquiries,Delinquency,Revolving_Bal
0,101708,40,M,College,Single,$120k+,189000,2020-12-31,1,15000.0,0.320402,756.0,0,0,1,0,0.0
1,101708,41,M,College,Single,$120k+,189000,2021-01-31,2,15000.0,0.33559,755.0,0,0,1,0,0.0
2,101708,41,M,College,Single,$120k+,189000,2021-02-28,3,15000.0,0.336946,755.0,0,0,1,0,0.0
3,101708,41,M,College,Single,$120k+,189000,2021-03-31,4,15000.0,0.283871,757.0,0,0,1,0,0.0
4,101708,41,M,College,Single,$120k+,189000,2021-04-30,5,15000.0,0.339113,755.0,0,0,1,0,0.0
5,101708,41,M,College,Single,$120k+,189000,2021-05-31,6,15000.0,0.304578,756.0,0,0,1,0,0.0
6,101708,41,M,College,Single,$120k+,189000,2021-06-30,7,15000.0,0.280835,757.0,0,0,1,0,0.0
7,101708,41,M,College,Single,$120k+,189000,2021-07-31,8,15000.0,0.284526,757.0,0,0,1,0,0.0
8,101708,41,M,College,Single,$120k+,189000,2021-08-31,9,15000.0,0.3116,756.0,0,0,1,0,0.0
9,101708,41,M,College,Single,$120k+,189000,2021-09-30,10,15000.0,0.285149,757.0,0,0,1,0,0.0


In [12]:
initial_debt = random.uniform(38600, 38700)  
debt_reduction = random.uniform(900, 1000)  

# Function to generate monthly revolving balance
def generate_total_debt(df):
    total_debt = []
    dti = []
    i = 0

    for index, row in df.iterrows():
        snapshot_date = row['Snapshot Month']
        Revolving_Balance = row['Revolving_Bal']
        Income = row['Income Numeric']
        card_max_util_gt_50 =  row['external_bank_credit_card_max_util_greater_than_50']
        
        if snapshot_date < risk_start_date:
            total_debt_debt = initial_debt - debt_reduction*i
            i = i+1
            debt_to_income = debt_reduction / (Income/12)
        else:
                total_debt_debt = initial_debt - debt_reduction*(i-1) + Revolving_Balance * card_max_util_gt_50
                debt_to_income = 1 + i*0.2
                i = i+1

        total_debt.append(total_debt_debt)
        dti.append(debt_to_income)
    
    df['Total_Debt'] = total_debt
    df['Debt_to_Income_Ratio'] = dti

    return df

# Generate the utilization and FICO for the customerE DataFrame
customerE = generate_total_debt(customerE)

customerE.head(100)

# Assign the computed Total_Debt values to the DataFrame
# customerE['Total_Debt'] = total_debt_values
# customerE['Debt_to_Income_Ratio'] = (
#     np.random.uniform(500, 600, size=len(customerE)) + 
#     np.minimum(customerE['external_bank_credit_card_max_util_greater_than_50'] + 1, 3) * customerE['Revolving_Bal']
# ) / (customerE['Income Numeric'] / 12)

# customerE.head(50)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit,Utilization,FICO,external_bank_credit_card_max_util_greater_than_50,external_bank_credit_card_max_util_greater_than_90,Credit_Inquiries,Delinquency,Revolving_Bal,Total_Debt,Debt_to_Income_Ratio
0,101708,40,M,College,Single,$120k+,189000,2020-12-31,1,15000.0,0.320402,756.0,0,0,1,0,0.0,38669.366819,0.059828
1,101708,41,M,College,Single,$120k+,189000,2021-01-31,2,15000.0,0.33559,755.0,0,0,1,0,0.0,37727.080625,0.059828
2,101708,41,M,College,Single,$120k+,189000,2021-02-28,3,15000.0,0.336946,755.0,0,0,1,0,0.0,36784.794431,0.059828
3,101708,41,M,College,Single,$120k+,189000,2021-03-31,4,15000.0,0.283871,757.0,0,0,1,0,0.0,35842.508237,0.059828
4,101708,41,M,College,Single,$120k+,189000,2021-04-30,5,15000.0,0.339113,755.0,0,0,1,0,0.0,34900.222044,0.059828
5,101708,41,M,College,Single,$120k+,189000,2021-05-31,6,15000.0,0.304578,756.0,0,0,1,0,0.0,33957.93585,0.059828
6,101708,41,M,College,Single,$120k+,189000,2021-06-30,7,15000.0,0.280835,757.0,0,0,1,0,0.0,33015.649656,0.059828
7,101708,41,M,College,Single,$120k+,189000,2021-07-31,8,15000.0,0.284526,757.0,0,0,1,0,0.0,32073.363462,0.059828
8,101708,41,M,College,Single,$120k+,189000,2021-08-31,9,15000.0,0.3116,756.0,0,0,1,0,0.0,31131.077269,0.059828
9,101708,41,M,College,Single,$120k+,189000,2021-09-30,10,15000.0,0.285149,757.0,0,0,1,0,0.0,30188.791075,0.059828


In [13]:
interchange_fee_factor = 0.02
interest_rate_monthly = 24.61 / 100 / 12

# Calculate Interchange Fee for all rows initially
customerE['Interchange Fee'] = customerE.apply(
    lambda row: row['Utilization'] * row['Credit_Limit'] * interchange_fee_factor, axis=1
)

customerE['Interchange Fee'] = np.where(
    customerE['Snapshot Month'] < risk_start_date,
    customerE['Utilization'] * customerE['Credit_Limit'] * interchange_fee_factor,
    customerE['Utilization'] * customerE['Credit_Limit'] * 0.05 * interchange_fee_factor
)

# # Get the last 7 rows based on Snapshot Month ascending order
# last_7_indices = customerE.sort_values(by='Snapshot Month').index[-7:]

# # Update Interchange Fee for the last 7 rows
# for i in range(1, len(last_7_indices)):
#     current_index = last_7_indices[i]
#     previous_index = last_7_indices[i - 1]
    
#     # Calculate the Interchange Fee for last 7 rows based on revolving balance difference
#     revolving_difference = customerE.loc[current_index, 'Revolving_Bal'] - customerE.loc[previous_index, 'Revolving_Bal'] - 30
#     interchange_fee = revolving_difference / (1 + interest_rate_monthly) * interchange_fee_factor
    
#     # Update the Interchange Fee for the current row
#     customerE.at[current_index, 'Interchange Fee'] = max(interchange_fee,0)

# customerE.loc[customerE['Delinquency'] > 3, 'Interchange Fee'] = 0


customerE['Late_Fee_Revenue'] = 0
# customerE.loc[customerE['Delinquency'] > 0, 'Late_Fee_Revenue'] = 30
# customerE.loc[customerE['Delinquency'] > 3, 'Late_Fee_Revenue'] = 0

customerE['Monthly_Interest_Revenue'] = customerE['Revolving_Bal'] * interest_rate_monthly
customerE.loc[customerE['Delinquency'] > 0, 'Monthly_Interest_Revenue'] = 0

customerE['Annual_Fee'] = np.where(
    customerE['Month on Book'] % 12 == 0,
    100,
    0
)

customerE['Total Revenue'] = customerE['Interchange Fee'] + customerE['Late_Fee_Revenue'] + customerE['Monthly_Interest_Revenue'] + customerE['Annual_Fee'] 

customerE.head(100)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit,...,Credit_Inquiries,Delinquency,Revolving_Bal,Total_Debt,Debt_to_Income_Ratio,Interchange Fee,Late_Fee_Revenue,Monthly_Interest_Revenue,Annual_Fee,Total Revenue
0,101708,40,M,College,Single,$120k+,189000,2020-12-31,1,15000.0,...,1,0,0.0,38669.366819,0.059828,96.120678,0,0.0,0,96.120678
1,101708,41,M,College,Single,$120k+,189000,2021-01-31,2,15000.0,...,1,0,0.0,37727.080625,0.059828,100.676969,0,0.0,0,100.676969
2,101708,41,M,College,Single,$120k+,189000,2021-02-28,3,15000.0,...,1,0,0.0,36784.794431,0.059828,101.083659,0,0.0,0,101.083659
3,101708,41,M,College,Single,$120k+,189000,2021-03-31,4,15000.0,...,1,0,0.0,35842.508237,0.059828,85.1613,0,0.0,0,85.1613
4,101708,41,M,College,Single,$120k+,189000,2021-04-30,5,15000.0,...,1,0,0.0,34900.222044,0.059828,101.733925,0,0.0,0,101.733925
5,101708,41,M,College,Single,$120k+,189000,2021-05-31,6,15000.0,...,1,0,0.0,33957.93585,0.059828,91.3733,0,0.0,0,91.3733
6,101708,41,M,College,Single,$120k+,189000,2021-06-30,7,15000.0,...,1,0,0.0,33015.649656,0.059828,84.250432,0,0.0,0,84.250432
7,101708,41,M,College,Single,$120k+,189000,2021-07-31,8,15000.0,...,1,0,0.0,32073.363462,0.059828,85.357738,0,0.0,0,85.357738
8,101708,41,M,College,Single,$120k+,189000,2021-08-31,9,15000.0,...,1,0,0.0,31131.077269,0.059828,93.479896,0,0.0,0,93.479896
9,101708,41,M,College,Single,$120k+,189000,2021-09-30,10,15000.0,...,1,0,0.0,30188.791075,0.059828,85.544663,0,0.0,0,85.544663


In [15]:
weight_utilization = 0.15

GT90_weights = {
    0: 0,
    1: 0.1,
    2: 0.15,
}

GT50_weights = {
    0: 0,
    1: 0.25,
    2: 0.25,
}

customerE['GT90_Weight'] = customerE['external_bank_credit_card_max_util_greater_than_90'].map(GT90_weights).fillna(0) 
customerE['GT50_Weight'] = customerE['external_bank_credit_card_max_util_greater_than_50'].map(GT50_weights).fillna(0)

# Normalize each factor (optional) and calculate ECL
customerE['ECL Ratio'] = customerE.apply(
    lambda x: min(
        0,05 + x['GT50_Weight'] * x['external_bank_credit_card_max_util_greater_than_50'] + x['GT90_Weight'] * x['external_bank_credit_card_max_util_greater_than_90'], 
        0.9
    ),
    axis=1
)

customerE['ECL'] = customerE['ECL Ratio'] * customerE['Credit_Limit']  * customerE['Utilization'] 

customerE.head(100)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit,...,Debt_to_Income_Ratio,Interchange Fee,Late_Fee_Revenue,Monthly_Interest_Revenue,Annual_Fee,Total Revenue,GT90_Weight,GT50_Weight,ECL Ratio,ECL
0,101708,40,M,College,Single,$120k+,189000,2020-12-31,1,15000.0,...,0.059828,96.120678,0,0.0,0,96.120678,0.0,0.0,0.0,0.0
1,101708,41,M,College,Single,$120k+,189000,2021-01-31,2,15000.0,...,0.059828,100.676969,0,0.0,0,100.676969,0.0,0.0,0.0,0.0
2,101708,41,M,College,Single,$120k+,189000,2021-02-28,3,15000.0,...,0.059828,101.083659,0,0.0,0,101.083659,0.0,0.0,0.0,0.0
3,101708,41,M,College,Single,$120k+,189000,2021-03-31,4,15000.0,...,0.059828,85.1613,0,0.0,0,85.1613,0.0,0.0,0.0,0.0
4,101708,41,M,College,Single,$120k+,189000,2021-04-30,5,15000.0,...,0.059828,101.733925,0,0.0,0,101.733925,0.0,0.0,0.0,0.0
5,101708,41,M,College,Single,$120k+,189000,2021-05-31,6,15000.0,...,0.059828,91.3733,0,0.0,0,91.3733,0.0,0.0,0.0,0.0
6,101708,41,M,College,Single,$120k+,189000,2021-06-30,7,15000.0,...,0.059828,84.250432,0,0.0,0,84.250432,0.0,0.0,0.0,0.0
7,101708,41,M,College,Single,$120k+,189000,2021-07-31,8,15000.0,...,0.059828,85.357738,0,0.0,0,85.357738,0.0,0.0,0.0,0.0
8,101708,41,M,College,Single,$120k+,189000,2021-08-31,9,15000.0,...,0.059828,93.479896,0,0.0,0,93.479896,0.0,0.0,0.0,0.0
9,101708,41,M,College,Single,$120k+,189000,2021-09-30,10,15000.0,...,0.059828,85.544663,0,0.0,0,85.544663,0.0,0.0,0.0,0.0


In [16]:
# Calculate the month-over-month ECL charge
customerE['ECL MoM Charge'] = customerE['ECL'].diff()
customerE.loc[0, 'ECL MoM Charge'] = customerE.loc[0, 'ECL']  # Set the first value to the first ECL

# Calculate Profit and cumulative metrics
customerE['Profit'] = customerE['Total Revenue'] - customerE['ECL MoM Charge']
customerE['Cumulative Profit'] = customerE['Profit'].cumsum()

# Calculate the month-over-month change in Cumulative Profit
customerE['MoM Cumulative Profit Change'] = customerE['Cumulative Profit'].diff()
customerE.loc[0, 'MoM Cumulative Profit Change'] = customerE.loc[0, 'Cumulative Profit']  # Set the first value

In [17]:

customerE.head(100)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit,...,Annual_Fee,Total Revenue,GT90_Weight,GT50_Weight,ECL Ratio,ECL,ECL MoM Charge,Profit,Cumulative Profit,MoM Cumulative Profit Change
0,101708,40,M,College,Single,$120k+,189000,2020-12-31,1,15000.0,...,0,96.120678,0.0,0.0,0.0,0.0,0.0,96.120678,96.120678,96.120678
1,101708,41,M,College,Single,$120k+,189000,2021-01-31,2,15000.0,...,0,100.676969,0.0,0.0,0.0,0.0,0.0,100.676969,196.797647,100.676969
2,101708,41,M,College,Single,$120k+,189000,2021-02-28,3,15000.0,...,0,101.083659,0.0,0.0,0.0,0.0,0.0,101.083659,297.881307,101.083659
3,101708,41,M,College,Single,$120k+,189000,2021-03-31,4,15000.0,...,0,85.1613,0.0,0.0,0.0,0.0,0.0,85.1613,383.042607,85.1613
4,101708,41,M,College,Single,$120k+,189000,2021-04-30,5,15000.0,...,0,101.733925,0.0,0.0,0.0,0.0,0.0,101.733925,484.776532,101.733925
5,101708,41,M,College,Single,$120k+,189000,2021-05-31,6,15000.0,...,0,91.3733,0.0,0.0,0.0,0.0,0.0,91.3733,576.149832,91.3733
6,101708,41,M,College,Single,$120k+,189000,2021-06-30,7,15000.0,...,0,84.250432,0.0,0.0,0.0,0.0,0.0,84.250432,660.400264,84.250432
7,101708,41,M,College,Single,$120k+,189000,2021-07-31,8,15000.0,...,0,85.357738,0.0,0.0,0.0,0.0,0.0,85.357738,745.758002,85.357738
8,101708,41,M,College,Single,$120k+,189000,2021-08-31,9,15000.0,...,0,93.479896,0.0,0.0,0.0,0.0,0.0,93.479896,839.237898,93.479896
9,101708,41,M,College,Single,$120k+,189000,2021-09-30,10,15000.0,...,0,85.544663,0.0,0.0,0.0,0.0,0.0,85.544663,924.782561,85.544663


In [18]:
customerE.to_csv('customerE.csv')