In [56]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
%matplotlib inline

In [57]:
df = pd.read_csv("credit_scoring.csv")

In [58]:
df

Unnamed: 0,Age,Gender,Marital Status,Education Level,Employment Status,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term,Type of Loan
0,60,Male,Married,Master,Employed,0.22,2685.0,2,4675000,2.65,48,Personal Loan
1,25,Male,Married,High School,Unemployed,0.20,2371.0,9,3619000,5.19,60,Auto Loan
2,30,Female,Single,Master,Employed,0.22,2771.0,6,957000,2.76,12,Auto Loan
3,58,Female,Married,PhD,Unemployed,0.12,1371.0,2,4731000,6.57,60,Auto Loan
4,32,Male,Married,Bachelor,Self-Employed,0.99,828.0,2,3289000,6.28,36,Personal Loan
...,...,...,...,...,...,...,...,...,...,...,...,...
995,59,Male,Divorced,High School,Employed,0.74,1285.0,8,3530000,12.99,48,Auto Loan
996,64,Male,Divorced,Bachelor,Unemployed,0.77,1857.0,2,1377000,18.02,60,Home Loan
997,63,Female,Single,Master,Self-Employed,0.18,2628.0,10,2443000,18.95,12,Personal Loan
998,51,Female,Married,PhD,Self-Employed,0.32,1142.0,3,1301000,1.80,24,Auto Loan


In [59]:
df.isnull().sum()

Age                          0
Gender                       0
Marital Status               0
Education Level              0
Employment Status            0
Credit Utilization Ratio     0
Payment History              0
Number of Credit Accounts    0
Loan Amount                  0
Interest Rate                0
Loan Term                    0
Type of Loan                 0
dtype: int64

In [60]:
df.duplicated().sum()

0

In [61]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Age                        1000 non-null   int64  
 1   Gender                     1000 non-null   object 
 2   Marital Status             1000 non-null   object 
 3   Education Level            1000 non-null   object 
 4   Employment Status          1000 non-null   object 
 5   Credit Utilization Ratio   1000 non-null   float64
 6   Payment History            1000 non-null   float64
 7   Number of Credit Accounts  1000 non-null   int64  
 8   Loan Amount                1000 non-null   int64  
 9   Interest Rate              1000 non-null   float64
 10  Loan Term                  1000 non-null   int64  
 11  Type of Loan               1000 non-null   object 
dtypes: float64(3), int64(4), object(5)
memory usage: 93.9+ KB


In [62]:
df.describe()

Unnamed: 0,Age,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,42.702,0.50995,1452.814,5.58,2471401.0,10.6866,37.128
std,13.266771,0.291057,827.934146,2.933634,1387047.0,5.479058,17.436274
min,20.0,0.0,0.0,1.0,108000.0,1.01,12.0
25%,31.0,0.25,763.75,3.0,1298000.0,6.0225,24.0
50%,42.0,0.53,1428.0,6.0,2437500.0,10.705,36.0
75%,54.0,0.75,2142.0,8.0,3653250.0,15.44,48.0
max,65.0,1.0,2857.0,10.0,4996000.0,19.99,60.0


In [63]:
credit_utilization_ratio = px.box(df, y="Credit Utilization Ratio", title="Credit Utilization Ratio")
credit_utilization_ratio.show()

In [64]:
loan_amount = px.histogram(df, x="Loan Amount", nbins=20, title="Loan Amount")
loan_amount.show()

In [65]:
numeric_features = ['Age', 'Credit Utilization Ratio', 'Payment History', 'Number of Credit Accounts', 'Loan Amount', 'Interest Rate']
correlation_matrix = px.imshow(df[numeric_features].corr(), text_auto=True, title="Correlation heatmap" )
correlation_matrix.show()

In [66]:
# FICO Score Method
# Define the mapping for categorical features
education_level_mapping = {
    'High School': 1,
    'Bachelor': 2,
    'Master': 3,
    'PhD': 4
}

marital_status_mapping = {
    'Married': 1,
    'Single': 0,
    'Divorced': 2
}

employment_status_mapping = {
    'Employed': 1,
    'Unemployed': 0,
    'Self-Employed': 2
}
# Apply mapping to categorical features
df['Education Level'] = df['Education Level'].map(education_level_mapping)
df['Employment Status'] = df['Employment Status'].map(employment_status_mapping)
df['Marital Status'] = df['Marital Status'].map(marital_status_mapping)

In [67]:
df

Unnamed: 0,Age,Gender,Marital Status,Education Level,Employment Status,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term,Type of Loan
0,60,Male,1,3,1,0.22,2685.0,2,4675000,2.65,48,Personal Loan
1,25,Male,1,1,0,0.20,2371.0,9,3619000,5.19,60,Auto Loan
2,30,Female,0,3,1,0.22,2771.0,6,957000,2.76,12,Auto Loan
3,58,Female,1,4,0,0.12,1371.0,2,4731000,6.57,60,Auto Loan
4,32,Male,1,2,2,0.99,828.0,2,3289000,6.28,36,Personal Loan
...,...,...,...,...,...,...,...,...,...,...,...,...
995,59,Male,2,1,1,0.74,1285.0,8,3530000,12.99,48,Auto Loan
996,64,Male,2,2,0,0.77,1857.0,2,1377000,18.02,60,Home Loan
997,63,Female,0,3,2,0.18,2628.0,10,2443000,18.95,12,Personal Loan
998,51,Female,1,4,2,0.32,1142.0,3,1301000,1.80,24,Auto Loan


In [69]:
# FICO Score Method
# calculate credit score
credit_scores =[]

for index, row in df.iterrows():
    payment_history = row['Payment History']
    credit_utilization_ratio = row['Credit Utilization Ratio']
    number_of_credit_account = row['Number of Credit Accounts']
    marital_status = row['Marital Status']
    education_level = row['Education Level']
    employment_status = row['Employment Status']

    # FICO Score Calculation (Weighted Formula)
    credit_score = credit_score = ((payment_history * 0.35) +  # Payment History
        ((1 - credit_utilization_ratio) * 300 * 0.30) +  # Lower utilization is better
        (number_of_credit_account * 10 * 0.10) +  # More accounts can be better, up to a limit
        (marital_status * 50 * 0.05) +  # Being married adds slight stability
        (education_level * 100 * 0.10) +  # Higher education boosts score
        (employment_status * 100 * 0.10)  # Being employed helps
    )
    credit_scores.append(credit_score)

df['Credit Score'] = credit_scores
df


Unnamed: 0,Age,Gender,Marital Status,Education Level,Employment Status,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term,Type of Loan,Credit Score
0,60,Male,1,3,1,0.22,2685.0,2,4675000,2.65,48,Personal Loan,1054.45
1,25,Male,1,1,0,0.20,2371.0,9,3619000,5.19,60,Auto Loan,923.35
2,30,Female,0,3,1,0.22,2771.0,6,957000,2.76,12,Auto Loan,1086.05
3,58,Female,1,4,0,0.12,1371.0,2,4731000,6.57,60,Auto Loan,603.55
4,32,Male,1,2,2,0.99,828.0,2,3289000,6.28,36,Personal Loan,335.20
...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,59,Male,2,1,1,0.74,1285.0,8,3530000,12.99,48,Auto Loan,506.15
996,64,Male,2,2,0,0.77,1857.0,2,1377000,18.02,60,Home Loan,697.65
997,63,Female,0,3,2,0.18,2628.0,10,2443000,18.95,12,Personal Loan,1053.60
998,51,Female,1,4,2,0.32,1142.0,3,1301000,1.80,24,Auto Loan,526.40


In [72]:
# Segmentation Based on Credit Score
from sklearn.cluster import KMeans

X = df[['Credit Score']]
kmeans = KMeans(n_clusters = 4, n_init=10, random_state=42)
kmeans.fit(X)
df['Segment'] = kmeans.labels_

In [73]:
# Convert Segment Column to category data type
df['Segment'] = df['Segment'].astype('category')

# Visualize the segment using plotly
fig = px.scatter(df, x=df.index, y='Credit Score', color='Segment', color_discrete_sequence=['green', 'blue', 'yellow', 'red'])

fig.update_layout(
    xaxis_title = 'Customer Index',
    yaxis_title = 'Credit Score',
    title = 'Customer Segmentation Based on Credit Score'
)
fig.show()

In [74]:
df['Segment'] = df['Segment'].map({2: 'Very Low', 
                                    0: 'Low',
                                    3: 'Good',
                                    1: "Excellent"})

# Convert the 'Segment' column to category df type
df['Segment'] = df['Segment'].astype('category')

# Visualize the segments using Plotly
fig = px.scatter(df, x=df.index, y='Credit Score', color='Segment',
                 color_discrete_sequence=['green', 'blue', 'yellow', 'red'])
fig.update_layout(
    xaxis_title='Customer Index',
    yaxis_title='Credit Score',
    title='Customer Segmentation based on Credit Scores'
)
fig.show()