In [1]:
import pandas as pd 
import numpy as np 
import os
from datetime import datetime, timedelta
import random

In [2]:
base_dir = os.path.dirname(os.path.abspath('data Customer C.ipynb'))
credit_case_study_folder = os.path.join(base_dir, 'Credit_EDA_case_study')
application_file_name = 'application_data.csv'

credit_case_study_data = pd.read_csv(os.path.join(credit_case_study_folder,application_file_name))
credit_case_study_data.head(10)

Unnamed: 0,SK_ID_CURR,TARGET,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,...,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100002,1,Cash loans,M,N,Y,0,202500.0,406597.5,24700.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
1,100003,0,Cash loans,F,N,N,0,270000.0,1293502.5,35698.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,100004,0,Revolving loans,M,Y,Y,0,67500.0,135000.0,6750.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
3,100006,0,Cash loans,F,N,Y,0,135000.0,312682.5,29686.5,...,0,0,0,0,,,,,,
4,100007,0,Cash loans,M,N,Y,0,121500.0,513000.0,21865.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
5,100008,0,Cash loans,M,N,Y,0,99000.0,490495.5,27517.5,...,0,0,0,0,0.0,0.0,0.0,0.0,1.0,1.0
6,100009,0,Cash loans,F,Y,Y,1,171000.0,1560726.0,41301.0,...,0,0,0,0,0.0,0.0,0.0,1.0,1.0,2.0
7,100010,0,Cash loans,M,Y,Y,0,360000.0,1530000.0,42075.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
8,100011,0,Cash loans,F,N,Y,0,112500.0,1019610.0,33826.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
9,100012,0,Revolving loans,M,N,Y,0,135000.0,405000.0,20250.0,...,0,0,0,0,,,,,,


In [3]:
credit_case_study_data = credit_case_study_data[[
    'SK_ID_CURR', 'DAYS_BIRTH', 'CODE_GENDER', 'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'AMT_INCOME_TOTAL'
]]
credit_case_study_data.head(10)

Unnamed: 0,SK_ID_CURR,DAYS_BIRTH,CODE_GENDER,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,AMT_INCOME_TOTAL
0,100002,-9461,M,Secondary / secondary special,Single / not married,202500.0
1,100003,-16765,F,Higher education,Married,270000.0
2,100004,-19046,M,Secondary / secondary special,Single / not married,67500.0
3,100006,-19005,F,Secondary / secondary special,Civil marriage,135000.0
4,100007,-19932,M,Secondary / secondary special,Single / not married,121500.0
5,100008,-16941,M,Secondary / secondary special,Married,99000.0
6,100009,-13778,F,Higher education,Married,171000.0
7,100010,-18850,M,Higher education,Married,360000.0
8,100011,-20099,F,Secondary / secondary special,Married,112500.0
9,100012,-14469,M,Secondary / secondary special,Single / not married,135000.0


In [4]:
rename_dict = {
    'SK_ID_CURR' : 'Customer ID', 
    'DAYS_BIRTH': 'Customer Age', 
    'CODE_GENDER': 'Gender', 
    'NAME_EDUCATION_TYPE': 'Education', 
    'NAME_FAMILY_STATUS': 'Marital Status', 
    'AMT_INCOME_TOTAL': 'Income Category'
}

credit_case_study_data = credit_case_study_data.rename(columns=rename_dict)
credit_case_study_data.head(10)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category
0,100002,-9461,M,Secondary / secondary special,Single / not married,202500.0
1,100003,-16765,F,Higher education,Married,270000.0
2,100004,-19046,M,Secondary / secondary special,Single / not married,67500.0
3,100006,-19005,F,Secondary / secondary special,Civil marriage,135000.0
4,100007,-19932,M,Secondary / secondary special,Single / not married,121500.0
5,100008,-16941,M,Secondary / secondary special,Married,99000.0
6,100009,-13778,F,Higher education,Married,171000.0
7,100010,-18850,M,Higher education,Married,360000.0
8,100011,-20099,F,Secondary / secondary special,Married,112500.0
9,100012,-14469,M,Secondary / secondary special,Single / not married,135000.0


In [5]:
credit_case_study_data['Customer Age'] = abs(credit_case_study_data['Customer Age']/365).astype(int)

credit_case_study_data['Income Numeric'] = credit_case_study_data['Income Category']
income_bins = [0, 40000, 60000, 80000, 100000, 120000, float('inf')]  
income_labels = ['Less than $40k', '$40k - $60k', '$60k to $80k', '$80k to $100k', '$100k to $120k', '$120k+']
credit_case_study_data['Income Category'] = pd.cut(credit_case_study_data['Income Category'], bins=income_bins, labels=income_labels, right=False)

education_mapping = {
    'Academic degree': 'College',
    'Higher education': 'Graduate',
    'Incomplete higher': 'College',
    'Lower secondary': 'Junior High School',
    'Secondary / secondary special': 'High School'
}
credit_case_study_data['Education'] = credit_case_study_data['Education'].map(education_mapping)

Marital_mapping = {
    'Civil marriage': 'Married',
    'Married': 'Married',
    'Seperated': 'Divorced',
    'Single / not married': 'Single',
    'Widow': 'Divorced',
    'Unknown': 'Unknown'
}
credit_case_study_data['Marital Status'] = credit_case_study_data['Marital Status'].map(Marital_mapping)


credit_case_study_data.head(10)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric
0,100002,25,M,High School,Single,$120k+,202500.0
1,100003,45,F,Graduate,Married,$120k+,270000.0
2,100004,52,M,High School,Single,$60k to $80k,67500.0
3,100006,52,F,High School,Married,$120k+,135000.0
4,100007,54,M,High School,Single,$120k+,121500.0
5,100008,46,M,High School,Married,$80k to $100k,99000.0
6,100009,37,F,Graduate,Married,$120k+,171000.0
7,100010,51,M,Graduate,Married,$120k+,360000.0
8,100011,55,F,High School,Married,$100k to $120k,112500.0
9,100012,39,M,High School,Single,$120k+,135000.0


In [6]:
filtered_data = credit_case_study_data[
    (credit_case_study_data['Customer Age'] < 34) &
    (credit_case_study_data['Customer Age'] > 30) &
    (credit_case_study_data['Education'] == 'College') &
    (credit_case_study_data['Gender'] == 'M') &
    (credit_case_study_data['Marital Status'] == 'Single') &
    (credit_case_study_data['Income Category'] == '$100k to $120k')
]

filtered_data.head(10)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric
4601,105382,32,M,College,Single,$100k to $120k,112500.0
69739,180904,31,M,College,Single,$100k to $120k,112500.0
109532,227033,32,M,College,Single,$100k to $120k,112500.0
133175,254457,31,M,College,Single,$100k to $120k,112500.0
137033,258933,32,M,College,Single,$100k to $120k,112500.0
150170,274088,32,M,College,Single,$100k to $120k,112500.0
156340,281225,33,M,College,Single,$100k to $120k,112500.0
166653,293199,33,M,College,Single,$100k to $120k,112500.0


In [7]:
customerC = credit_case_study_data[
    (credit_case_study_data['Customer ID'] ==  105382) 
]

customerC.head(10)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric
4601,105382,32,M,College,Single,$100k to $120k,112500.0


In [8]:
customerC['Snapshot Month'] = pd.to_datetime('2017-7-31')
customerC['Month on Book'] = 1
customerC['Credit_Limit'] = (customerC['Income Numeric'] * 0.12).round(-3)

start_date = pd.to_datetime('2017-7-31')
end_date = pd.to_datetime('2021-12-31')
dates = pd.date_range(start=start_date, end=end_date, freq='M')

# Create the new DataFrame with time series data
time_series_customerC = []

# Iterate through the dates to generate the rows
for date in dates:
    # Copy the customer data for each row
    new_row = customerC.copy()
    
    # Update snapshot month to the current date
    new_row['Snapshot Month'] = date
    
    # Update Customer Age: +1 every year
    new_row['Customer Age'] = customerC['Customer Age'] + (date.year - start_date.year)
    
    # Update MoB: +1 for each row
    new_row['Month on Book'] = customerC['Month on Book'] + (date.year - start_date.year) * 12 + (date.month - start_date.month)
    
    # Append the new row to the list
    time_series_customerC.append(new_row)
    
# Concatenate all rows into a single DataFrame
customerC = pd.concat(time_series_customerC, ignore_index=True)

customerC.head(10)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  customerC['Snapshot Month'] = pd.to_datetime('2017-7-31')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  customerC['Month on Book'] = 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  customerC['Credit_Limit'] = (customerC['Income Numeric'] * 0.12).round(-3)
  dates = pd.date_range(start=start_date,

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit
0,105382,32,M,College,Single,$100k to $120k,112500.0,2017-07-31,1,14000.0
1,105382,32,M,College,Single,$100k to $120k,112500.0,2017-08-31,2,14000.0
2,105382,32,M,College,Single,$100k to $120k,112500.0,2017-09-30,3,14000.0
3,105382,32,M,College,Single,$100k to $120k,112500.0,2017-10-31,4,14000.0
4,105382,32,M,College,Single,$100k to $120k,112500.0,2017-11-30,5,14000.0
5,105382,32,M,College,Single,$100k to $120k,112500.0,2017-12-31,6,14000.0
6,105382,33,M,College,Single,$100k to $120k,112500.0,2018-01-31,7,14000.0
7,105382,33,M,College,Single,$100k to $120k,112500.0,2018-02-28,8,14000.0
8,105382,33,M,College,Single,$100k to $120k,112500.0,2018-03-31,9,14000.0
9,105382,33,M,College,Single,$100k to $120k,112500.0,2018-04-30,10,14000.0


In [9]:
initial_fico = 750
min_utilization = 0.13
max_utilization = 0.20
risk_start_date = pd.to_datetime('2019-11-30')  # Starting point for Delinquency
max_peak_utilization = 0.99  # Peak utilization during Delinquency period
fico_deterioration_rate = 5  # Deterioration in FICO score per 10% increase in utilization
fico_fluctuation_rate = 1
external_bank_credit_card_max_util_greater_than_50_base = 0
external_bank_credit_card_max_util_greater_than_90_base = 0
credit_inquiy_base = 0


# Function to adjust FICO based on utilization
def calculate_fico(utilization, fico_score):
    # FICO deteriorates as utilization increases
    if utilization > 0.20:
        fico_deduction = fico_deterioration_rate * ((utilization - 0.20) // 0.10)
        fico_score = max(fico_score - fico_deduction, 300)  # FICO score can't go below 300
    return fico_score

# Function to adjust FICO based on utilization
def fluctuate_fico(utilization, fico_score):
    fico_deduction = fico_fluctuation_rate * ((utilization - 0.20) // 0.025)
    fico_score = max(fico_score - fico_deduction, 300) 
    return fico_score

# Function to generate monthly utilization and FICO
def generate_utilization_fico(df):
    fico_scores = []
    utilizations = []
    external_bank_credit_card_max_util_greater_than_50 = []
    external_bank_credit_card_max_util_greater_than_90 = []
    credit_inquiries = []
    
    for index, row in df.iterrows():
        snapshot_date = row['Snapshot Month']
        MoB = row['Month on Book']
        
        if snapshot_date < risk_start_date:
            utilization = random.uniform(min_utilization, max_utilization)
            fico_score = fluctuate_fico(utilization, initial_fico)
            credit_card_max_util_greater_than_50 = external_bank_credit_card_max_util_greater_than_50_base
            credit_card_max_util_greater_than_90 = external_bank_credit_card_max_util_greater_than_90_base
            if MoB <= 12:
                credit_inquiry = credit_inquiy_base
            else:
                credit_inquiry = 0
        else:
            months_since_risk_start_date = (snapshot_date - risk_start_date).days // 30
            
            if months_since_risk_start_date <= 3:
                # First 4 months: 
                utilization = random.uniform(min_utilization, max_utilization)
                credit_card_max_util_greater_than_50 = external_bank_credit_card_max_util_greater_than_50_base 
                credit_card_max_util_greater_than_90 = external_bank_credit_card_max_util_greater_than_90_base 
                credit_inquiry = credit_inquiy_base + 3

            elif 4 <= months_since_risk_start_date <= 11:
                # Months 5-11: 2% increase
                utilization =  0.20 + 0.02 * months_since_risk_start_date + random.uniform(0.01,0.02)  
                credit_card_max_util_greater_than_50 = external_bank_credit_card_max_util_greater_than_50_base + 1
                credit_card_max_util_greater_than_90 = external_bank_credit_card_max_util_greater_than_90_base 
                credit_inquiry = credit_inquiy_base + 3

            elif 12 <= months_since_risk_start_date <= 14:
                # Months 12-14: larger increase (more than 5% increase)
                utilization = min(0.36 + 0.05 * (months_since_risk_start_date - 11) + random.uniform(0.01,0.02), 0.55 + random.uniform(0.01,0.02))  #min(0.45 + 0.10 * (months_since_risk_start_date - 11) + random.uniform(0.01,0.02), 0.79 + random.uniform(0.01,0.02) )
                credit_card_max_util_greater_than_50 = external_bank_credit_card_max_util_greater_than_50_base + 2
                credit_card_max_util_greater_than_90 = external_bank_credit_card_max_util_greater_than_90_base + 1
                credit_inquiry = credit_inquiy_base + 4

            elif 14 <= months_since_risk_start_date <= 19:
                # Months 14-19: larger increase (more than 7% increase)
                utilization = min(0.55 + 0.07 * (months_since_risk_start_date - 14) + random.uniform(0.01,0.02), 0.95 + random.uniform(0.01,0.02))  #min(0.45 + 0.10 * (months_since_risk_start_date - 11) + random.uniform(0.01,0.02), 0.79 + random.uniform(0.01,0.02) )
                credit_card_max_util_greater_than_50 = external_bank_credit_card_max_util_greater_than_50_base + 3
                credit_card_max_util_greater_than_90 = external_bank_credit_card_max_util_greater_than_90_base + 2
                credit_inquiry = credit_inquiy_base + 4

            else:
                # After reaching 90%, slow increase by 0.8% per month
                utilization = min(0.95 + 0.008 * (months_since_risk_start_date - 19) + random.uniform(0.005,0.009), max_peak_utilization)
                credit_card_max_util_greater_than_50 = external_bank_credit_card_max_util_greater_than_50_base + 4
                credit_card_max_util_greater_than_90 = external_bank_credit_card_max_util_greater_than_90_base + 4
                credit_inquiry = credit_inquiy_base + 4
                
            fico_score = calculate_fico(utilization, initial_fico)

        utilizations.append(utilization)
        fico_scores.append(fico_score)
        external_bank_credit_card_max_util_greater_than_50.append(credit_card_max_util_greater_than_50)
        external_bank_credit_card_max_util_greater_than_90.append(credit_card_max_util_greater_than_90)
        credit_inquiries.append(credit_inquiry)
    
    df['Utilization'] = utilizations
    df['FICO'] = fico_scores
    df['external_bank_credit_card_max_util_greater_than_50'] = external_bank_credit_card_max_util_greater_than_50
    df['external_bank_credit_card_max_util_greater_than_90'] = external_bank_credit_card_max_util_greater_than_90
    df['Credit_Inquiries'] = credit_inquiries
    return df

# Generate the utilization and FICO for the customerC DataFrame
customerC = generate_utilization_fico(customerC)

customerC.head(100)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit,Utilization,FICO,external_bank_credit_card_max_util_greater_than_50,external_bank_credit_card_max_util_greater_than_90,Credit_Inquiries
0,105382,32,M,College,Single,$100k to $120k,112500.0,2017-07-31,1,14000.0,0.198003,751.0,0,0,0
1,105382,32,M,College,Single,$100k to $120k,112500.0,2017-08-31,2,14000.0,0.171242,752.0,0,0,0
2,105382,32,M,College,Single,$100k to $120k,112500.0,2017-09-30,3,14000.0,0.181813,751.0,0,0,0
3,105382,32,M,College,Single,$100k to $120k,112500.0,2017-10-31,4,14000.0,0.148353,753.0,0,0,0
4,105382,32,M,College,Single,$100k to $120k,112500.0,2017-11-30,5,14000.0,0.181531,751.0,0,0,0
5,105382,32,M,College,Single,$100k to $120k,112500.0,2017-12-31,6,14000.0,0.163008,752.0,0,0,0
6,105382,33,M,College,Single,$100k to $120k,112500.0,2018-01-31,7,14000.0,0.18114,751.0,0,0,0
7,105382,33,M,College,Single,$100k to $120k,112500.0,2018-02-28,8,14000.0,0.182205,751.0,0,0,0
8,105382,33,M,College,Single,$100k to $120k,112500.0,2018-03-31,9,14000.0,0.171825,752.0,0,0,0
9,105382,33,M,College,Single,$100k to $120k,112500.0,2018-04-30,10,14000.0,0.163462,752.0,0,0,0


In [10]:
customerC = customerC.sort_values(by='Month on Book', ascending=False)

customerC['Delinquency'] = 0
max_Delinquency = 7
for i in range(max_Delinquency + 1):
    if i < len(customerC):
        customerC.iloc[i, customerC.columns.get_loc('Delinquency')] = max_Delinquency - i
        customerC.iloc[i, customerC.columns.get_loc('FICO')] = customerC.iloc[i, customerC.columns.get_loc('FICO')] - (max_Delinquency - i)*20

# pre_risk_data = customerC[(customerC['Snapshot Month'] < risk_start_date) & (customerC['Month on Book']>=3)]
# if not pre_risk_data.empty:
#     highest_utilization_index = pre_risk_data['Utilization'].idxmax()
#     # Set the Delinquency of the highest utilization row to 1
#     customerC.at[highest_utilization_index, 'Delinquency'] = 1

customerC = customerC.sort_values(by='Snapshot Month').reset_index(drop=True)

In [11]:
Revolving_balance_base = 0


# Function to generate monthly revolving balance
def generate_revolving_balance(df):
    revolving_balance = []
    
    for index, row in df.iterrows():
        snapshot_date = row['Snapshot Month']
        credit_limit = row['Credit_Limit']
        utilization = row['Utilization']
        
        if snapshot_date < risk_start_date:
            revolving_bal = Revolving_balance_base
        else:
            months_since_risk_start_date = (snapshot_date - risk_start_date).days // 30
            
            if months_since_risk_start_date <= 3:
                # First 4 months: 
                revolving_bal = Revolving_balance_base

            elif 4 <= months_since_risk_start_date <= 19:
                revolving_bal = 0.85 * credit_limit * utilization

            else:
                revolving_bal = credit_limit * utilization

        revolving_balance.append(revolving_bal)
    
    df['Revolving_Bal'] = revolving_balance

    return df

# Generate the utilization and FICO for the customerC DataFrame
customerC = generate_revolving_balance(customerC)

revolving_balance_delinquency_3 = customerC.loc[customerC['Delinquency'] == 3, 'Revolving_Bal'].iloc[0]
customerC.loc[customerC['Delinquency'] > 3, 'Revolving_Bal'] = revolving_balance_delinquency_3

customerC.head(100)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit,Utilization,FICO,external_bank_credit_card_max_util_greater_than_50,external_bank_credit_card_max_util_greater_than_90,Credit_Inquiries,Delinquency,Revolving_Bal
0,105382,32,M,College,Single,$100k to $120k,112500.0,2017-07-31,1,14000.0,0.198003,751.0,0,0,0,0,0.0
1,105382,32,M,College,Single,$100k to $120k,112500.0,2017-08-31,2,14000.0,0.171242,752.0,0,0,0,0,0.0
2,105382,32,M,College,Single,$100k to $120k,112500.0,2017-09-30,3,14000.0,0.181813,751.0,0,0,0,0,0.0
3,105382,32,M,College,Single,$100k to $120k,112500.0,2017-10-31,4,14000.0,0.148353,753.0,0,0,0,0,0.0
4,105382,32,M,College,Single,$100k to $120k,112500.0,2017-11-30,5,14000.0,0.181531,751.0,0,0,0,0,0.0
5,105382,32,M,College,Single,$100k to $120k,112500.0,2017-12-31,6,14000.0,0.163008,752.0,0,0,0,0,0.0
6,105382,33,M,College,Single,$100k to $120k,112500.0,2018-01-31,7,14000.0,0.18114,751.0,0,0,0,0,0.0
7,105382,33,M,College,Single,$100k to $120k,112500.0,2018-02-28,8,14000.0,0.182205,751.0,0,0,0,0,0.0
8,105382,33,M,College,Single,$100k to $120k,112500.0,2018-03-31,9,14000.0,0.171825,752.0,0,0,0,0,0.0
9,105382,33,M,College,Single,$100k to $120k,112500.0,2018-04-30,10,14000.0,0.163462,752.0,0,0,0,0,0.0


In [12]:
initial_debt = random.uniform(12500, 13000)  
debt_reduction = random.uniform(300, 350)  
new_debt = random.uniform(300000,350000)
new_debt_reduction = random.uniform(2000,2500)

# Function to generate monthly revolving balance
def generate_total_debt(df):
    total_debt = []
    dti = []
    i = 0
    j = 0
    for index, row in df.iterrows():
        snapshot_date = row['Snapshot Month']
        Revolving_Balance = row['Revolving_Bal']
        Income = row['Income Numeric']
        card_max_util_gt_50 =  row['external_bank_credit_card_max_util_greater_than_50']
        
        if snapshot_date < risk_start_date:
            total_debt_debt = initial_debt - debt_reduction*i
            i = i+1
            debt_to_income = debt_reduction / (Income/12)
        else:
            months_since_risk_start_date = (snapshot_date - risk_start_date).days // 30
            
            if months_since_risk_start_date <= 3:
                # First 4 months: 
                total_debt_debt = initial_debt - debt_reduction*(i-1) + new_debt - new_debt_reduction * j
                j = j + 1

                debt_to_income = (debt_reduction + new_debt_reduction) / (Income/12)

            else:
                total_debt_debt = initial_debt - debt_reduction*(i-1) + new_debt - new_debt_reduction * (j-1) - random.uniform(0,200) + (Revolving_Balance-Revolving_Balance_last_row)

                debt_to_income = (debt_reduction + new_debt_reduction  + Revolving_Balance*0.1*max(card_max_util_gt_50,2)) / (Income/12)
        
        Revolving_Balance_last_row = row['Revolving_Bal']

        total_debt.append(total_debt_debt)
        dti.append(debt_to_income)
    
    df['Total_Debt'] = total_debt
    df['Debt_to_Income_Ratio'] = dti

    return df

# Generate the utilization and FICO for the customerC DataFrame
customerC = generate_total_debt(customerC)

customerC.head(100)

# Assign the computed Total_Debt values to the DataFrame
# customerC['Total_Debt'] = total_debt_values
# customerC['Debt_to_Income_Ratio'] = (
#     np.random.uniform(500, 600, size=len(customerC)) + 
#     np.minimum(customerC['external_bank_credit_card_max_util_greater_than_50'] + 1, 3) * customerC['Revolving_Bal']
# ) / (customerC['Income Numeric'] / 12)

# customerC.head(50)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit,Utilization,FICO,external_bank_credit_card_max_util_greater_than_50,external_bank_credit_card_max_util_greater_than_90,Credit_Inquiries,Delinquency,Revolving_Bal,Total_Debt,Debt_to_Income_Ratio
0,105382,32,M,College,Single,$100k to $120k,112500.0,2017-07-31,1,14000.0,0.198003,751.0,0,0,0,0,0.0,12816.655741,0.03531
1,105382,32,M,College,Single,$100k to $120k,112500.0,2017-08-31,2,14000.0,0.171242,752.0,0,0,0,0,0.0,12485.626182,0.03531
2,105382,32,M,College,Single,$100k to $120k,112500.0,2017-09-30,3,14000.0,0.181813,751.0,0,0,0,0,0.0,12154.596624,0.03531
3,105382,32,M,College,Single,$100k to $120k,112500.0,2017-10-31,4,14000.0,0.148353,753.0,0,0,0,0,0.0,11823.567065,0.03531
4,105382,32,M,College,Single,$100k to $120k,112500.0,2017-11-30,5,14000.0,0.181531,751.0,0,0,0,0,0.0,11492.537507,0.03531
5,105382,32,M,College,Single,$100k to $120k,112500.0,2017-12-31,6,14000.0,0.163008,752.0,0,0,0,0,0.0,11161.507948,0.03531
6,105382,33,M,College,Single,$100k to $120k,112500.0,2018-01-31,7,14000.0,0.18114,751.0,0,0,0,0,0.0,10830.478389,0.03531
7,105382,33,M,College,Single,$100k to $120k,112500.0,2018-02-28,8,14000.0,0.182205,751.0,0,0,0,0,0.0,10499.448831,0.03531
8,105382,33,M,College,Single,$100k to $120k,112500.0,2018-03-31,9,14000.0,0.171825,752.0,0,0,0,0,0.0,10168.419272,0.03531
9,105382,33,M,College,Single,$100k to $120k,112500.0,2018-04-30,10,14000.0,0.163462,752.0,0,0,0,0,0.0,9837.389714,0.03531


In [13]:
interchange_fee_factor = 0.02
interest_rate_monthly = 24.61 / 100 / 12

# Calculate Interchange Fee for all rows initially
customerC['Interchange Fee'] = customerC.apply(
    lambda row: row['Utilization'] * row['Credit_Limit'] * interchange_fee_factor, axis=1
)

# Get the last 7 rows based on Snapshot Month ascending order
last_7_indices = customerC.sort_values(by='Snapshot Month').index[-7:]

# Update Interchange Fee for the last 7 rows
for i in range(1, len(last_7_indices)):
    current_index = last_7_indices[i]
    previous_index = last_7_indices[i - 1]
    
    # Calculate the Interchange Fee for last 7 rows based on revolving balance difference
    revolving_difference = customerC.loc[current_index, 'Revolving_Bal'] - customerC.loc[previous_index, 'Revolving_Bal'] - 30
    interchange_fee = revolving_difference / (1 + interest_rate_monthly) * interchange_fee_factor
    
    # Update the Interchange Fee for the current row
    customerC.at[current_index, 'Interchange Fee'] = max(interchange_fee,0)

customerC.loc[customerC['Delinquency'] > 3, 'Interchange Fee'] = 0


customerC['Late_Fee_Revenue'] = 0
customerC.loc[customerC['Delinquency'] > 0, 'Late_Fee_Revenue'] = 30
customerC.loc[customerC['Delinquency'] > 3, 'Late_Fee_Revenue'] = 0

customerC['Monthly_Interest_Revenue'] = customerC['Revolving_Bal'] * interest_rate_monthly
customerC.loc[customerC['Delinquency'] > 0, 'Monthly_Interest_Revenue'] = 0

customerC['Annual_Fee'] = np.where(
    customerC['Month on Book'] % 12 == 0,
    100,
    0
)

customerC['Total Revenue'] = customerC['Interchange Fee'] + customerC['Late_Fee_Revenue'] + customerC['Monthly_Interest_Revenue'] + customerC['Annual_Fee'] 

customerC.head(100)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit,...,Credit_Inquiries,Delinquency,Revolving_Bal,Total_Debt,Debt_to_Income_Ratio,Interchange Fee,Late_Fee_Revenue,Monthly_Interest_Revenue,Annual_Fee,Total Revenue
0,105382,32,M,College,Single,$100k to $120k,112500.0,2017-07-31,1,14000.0,...,0,0,0.0,12816.655741,0.03531,55.440829,0,0.0,0,55.440829
1,105382,32,M,College,Single,$100k to $120k,112500.0,2017-08-31,2,14000.0,...,0,0,0.0,12485.626182,0.03531,47.947632,0,0.0,0,47.947632
2,105382,32,M,College,Single,$100k to $120k,112500.0,2017-09-30,3,14000.0,...,0,0,0.0,12154.596624,0.03531,50.907549,0,0.0,0,50.907549
3,105382,32,M,College,Single,$100k to $120k,112500.0,2017-10-31,4,14000.0,...,0,0,0.0,11823.567065,0.03531,41.538821,0,0.0,0,41.538821
4,105382,32,M,College,Single,$100k to $120k,112500.0,2017-11-30,5,14000.0,...,0,0,0.0,11492.537507,0.03531,50.828724,0,0.0,0,50.828724
5,105382,32,M,College,Single,$100k to $120k,112500.0,2017-12-31,6,14000.0,...,0,0,0.0,11161.507948,0.03531,45.642182,0,0.0,0,45.642182
6,105382,33,M,College,Single,$100k to $120k,112500.0,2018-01-31,7,14000.0,...,0,0,0.0,10830.478389,0.03531,50.719286,0,0.0,0,50.719286
7,105382,33,M,College,Single,$100k to $120k,112500.0,2018-02-28,8,14000.0,...,0,0,0.0,10499.448831,0.03531,51.017528,0,0.0,0,51.017528
8,105382,33,M,College,Single,$100k to $120k,112500.0,2018-03-31,9,14000.0,...,0,0,0.0,10168.419272,0.03531,48.110954,0,0.0,0,48.110954
9,105382,33,M,College,Single,$100k to $120k,112500.0,2018-04-30,10,14000.0,...,0,0,0.0,9837.389714,0.03531,45.769236,0,0.0,0,45.769236


In [14]:
weight_utilization = 0.15
delinquency_weights = {
    0: 0.05,
    1: 0.15,
    2: 0.4,
    3: 0.65,
    4: 0.72,
    5: 0.8,
    6: 0.84,
    7: 0.86
}
customerC['Delinquency_Weight'] = customerC['Delinquency'].map(delinquency_weights).fillna(0) 
# weight_debt_income_ratio = 0.5

# Normalize each factor (optional) and calculate ECL
customerC['ECL Ratio'] = customerC.apply(
    lambda x: min(
        weight_utilization * x['external_bank_credit_card_max_util_greater_than_50'] + x['Delinquency_Weight'], 
        0.9
    ),
    axis=1
)

customerC['ECL'] = customerC['ECL Ratio'] * customerC['Credit_Limit']  * customerC['Utilization'] 

customerC.head(100)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit,...,Total_Debt,Debt_to_Income_Ratio,Interchange Fee,Late_Fee_Revenue,Monthly_Interest_Revenue,Annual_Fee,Total Revenue,Delinquency_Weight,ECL Ratio,ECL
0,105382,32,M,College,Single,$100k to $120k,112500.0,2017-07-31,1,14000.0,...,12816.655741,0.03531,55.440829,0,0.0,0,55.440829,0.05,0.05,138.602074
1,105382,32,M,College,Single,$100k to $120k,112500.0,2017-08-31,2,14000.0,...,12485.626182,0.03531,47.947632,0,0.0,0,47.947632,0.05,0.05,119.86908
2,105382,32,M,College,Single,$100k to $120k,112500.0,2017-09-30,3,14000.0,...,12154.596624,0.03531,50.907549,0,0.0,0,50.907549,0.05,0.05,127.268872
3,105382,32,M,College,Single,$100k to $120k,112500.0,2017-10-31,4,14000.0,...,11823.567065,0.03531,41.538821,0,0.0,0,41.538821,0.05,0.05,103.847053
4,105382,32,M,College,Single,$100k to $120k,112500.0,2017-11-30,5,14000.0,...,11492.537507,0.03531,50.828724,0,0.0,0,50.828724,0.05,0.05,127.07181
5,105382,32,M,College,Single,$100k to $120k,112500.0,2017-12-31,6,14000.0,...,11161.507948,0.03531,45.642182,0,0.0,0,45.642182,0.05,0.05,114.105454
6,105382,33,M,College,Single,$100k to $120k,112500.0,2018-01-31,7,14000.0,...,10830.478389,0.03531,50.719286,0,0.0,0,50.719286,0.05,0.05,126.798216
7,105382,33,M,College,Single,$100k to $120k,112500.0,2018-02-28,8,14000.0,...,10499.448831,0.03531,51.017528,0,0.0,0,51.017528,0.05,0.05,127.54382
8,105382,33,M,College,Single,$100k to $120k,112500.0,2018-03-31,9,14000.0,...,10168.419272,0.03531,48.110954,0,0.0,0,48.110954,0.05,0.05,120.277386
9,105382,33,M,College,Single,$100k to $120k,112500.0,2018-04-30,10,14000.0,...,9837.389714,0.03531,45.769236,0,0.0,0,45.769236,0.05,0.05,114.42309


In [15]:
# Calculate the month-over-month ECL charge
customerC['ECL MoM Charge'] = customerC['ECL'].diff()
customerC.loc[0, 'ECL MoM Charge'] = customerC.loc[0, 'ECL']  # Set the first value to the first ECL

# Calculate Profit and cumulative metrics
customerC['Profit'] = customerC['Total Revenue'] - customerC['ECL MoM Charge']
customerC['Cumulative Profit'] = customerC['Profit'].cumsum()

# Calculate the month-over-month change in Cumulative Profit
customerC['MoM Cumulative Profit Change'] = customerC['Cumulative Profit'].diff()
customerC.loc[0, 'MoM Cumulative Profit Change'] = customerC.loc[0, 'Cumulative Profit']  # Set the first value

In [16]:

customerC.head(100)

Unnamed: 0,Customer ID,Customer Age,Gender,Education,Marital Status,Income Category,Income Numeric,Snapshot Month,Month on Book,Credit_Limit,...,Monthly_Interest_Revenue,Annual_Fee,Total Revenue,Delinquency_Weight,ECL Ratio,ECL,ECL MoM Charge,Profit,Cumulative Profit,MoM Cumulative Profit Change
0,105382,32,M,College,Single,$100k to $120k,112500.0,2017-07-31,1,14000.0,...,0.0,0,55.440829,0.05,0.05,138.602074,138.602074,-83.161244,-83.161244,-83.161244
1,105382,32,M,College,Single,$100k to $120k,112500.0,2017-08-31,2,14000.0,...,0.0,0,47.947632,0.05,0.05,119.86908,-18.732994,66.680626,-16.480618,66.680626
2,105382,32,M,College,Single,$100k to $120k,112500.0,2017-09-30,3,14000.0,...,0.0,0,50.907549,0.05,0.05,127.268872,7.399792,43.507756,27.027138,43.507756
3,105382,32,M,College,Single,$100k to $120k,112500.0,2017-10-31,4,14000.0,...,0.0,0,41.538821,0.05,0.05,103.847053,-23.421819,64.96064,91.987778,64.96064
4,105382,32,M,College,Single,$100k to $120k,112500.0,2017-11-30,5,14000.0,...,0.0,0,50.828724,0.05,0.05,127.07181,23.224757,27.603967,119.591745,27.603967
5,105382,32,M,College,Single,$100k to $120k,112500.0,2017-12-31,6,14000.0,...,0.0,0,45.642182,0.05,0.05,114.105454,-12.966356,58.608538,178.200283,58.608538
6,105382,33,M,College,Single,$100k to $120k,112500.0,2018-01-31,7,14000.0,...,0.0,0,50.719286,0.05,0.05,126.798216,12.692762,38.026524,216.226808,38.026524
7,105382,33,M,College,Single,$100k to $120k,112500.0,2018-02-28,8,14000.0,...,0.0,0,51.017528,0.05,0.05,127.54382,0.745604,50.271924,266.498731,50.271924
8,105382,33,M,College,Single,$100k to $120k,112500.0,2018-03-31,9,14000.0,...,0.0,0,48.110954,0.05,0.05,120.277386,-7.266434,55.377388,321.876119,55.377388
9,105382,33,M,College,Single,$100k to $120k,112500.0,2018-04-30,10,14000.0,...,0.0,0,45.769236,0.05,0.05,114.42309,-5.854296,51.623532,373.499651,51.623532


In [17]:
customerC.to_csv('customerC.csv')