In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
data = pd.read_csv("credit_scoring.csv")
print(data.head())

   Age  Gender Marital Status Education Level Employment Status  \
0   60    Male        Married          Master          Employed   
1   25    Male        Married     High School        Unemployed   
2   30  Female         Single          Master          Employed   
3   58  Female        Married             PhD        Unemployed   
4   32    Male        Married        Bachelor     Self-Employed   

   Credit Utilization Ratio  Payment History  Number of Credit Accounts  \
0                      0.22           2685.0                          2   
1                      0.20           2371.0                          9   
2                      0.22           2771.0                          6   
3                      0.12           1371.0                          2   
4                      0.99            828.0                          2   

   Loan Amount  Interest Rate  Loan Term   Type of Loan  
0      4675000           2.65         48  Personal Loan  
1      3619000           5.19 

In [6]:
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Age                        1000 non-null   int64  
 1   Gender                     1000 non-null   object 
 2   Marital Status             1000 non-null   object 
 3   Education Level            1000 non-null   object 
 4   Employment Status          1000 non-null   object 
 5   Credit Utilization Ratio   1000 non-null   float64
 6   Payment History            1000 non-null   float64
 7   Number of Credit Accounts  1000 non-null   int64  
 8   Loan Amount                1000 non-null   int64  
 9   Interest Rate              1000 non-null   float64
 10  Loan Term                  1000 non-null   int64  
 11  Type of Loan               1000 non-null   object 
dtypes: float64(3), int64(4), object(5)
memory usage: 93.9+ KB
None


In [7]:
print(data.describe())

               Age  Credit Utilization Ratio  Payment History  \
count  1000.000000               1000.000000      1000.000000   
mean     42.702000                  0.509950      1452.814000   
std      13.266771                  0.291057       827.934146   
min      20.000000                  0.000000         0.000000   
25%      31.000000                  0.250000       763.750000   
50%      42.000000                  0.530000      1428.000000   
75%      54.000000                  0.750000      2142.000000   
max      65.000000                  1.000000      2857.000000   

       Number of Credit Accounts   Loan Amount  Interest Rate    Loan Term  
count                1000.000000  1.000000e+03    1000.000000  1000.000000  
mean                    5.580000  2.471401e+06      10.686600    37.128000  
std                     2.933634  1.387047e+06       5.479058    17.436274  
min                     1.000000  1.080000e+05       1.010000    12.000000  
25%                     3.000

In [8]:
education_level_mapping = {'High School': 1, 'Bachelor': 2, 'Master': 3, 'PhD': 4}
employment_status_mapping = {'Unemployed': 0, 'Employed': 1, 'Self-Employed': 2}
data['Education Level'] = data['Education Level'].map(education_level_mapping)
data['Employment Status'] = data['Employment Status'].map(employment_status_mapping)


credit_scores = []

for index, row in data.iterrows():
    payment_history = row['Payment History']
    credit_utilization_ratio = row['Credit Utilization Ratio']
    number_of_credit_accounts = row['Number of Credit Accounts']
    education_level = row['Education Level']
    employment_status = row['Employment Status']

    credit_score = (payment_history * 0.35) + (credit_utilization_ratio * 0.30) + (number_of_credit_accounts * 0.15) + (education_level * 0.10) + (employment_status * 0.10)
    credit_scores.append(credit_score)

data['Credit Score'] = credit_scores

print(data.head())

   Age  Gender Marital Status  Education Level  Employment Status  \
0   60    Male        Married                3                  1   
1   25    Male        Married                1                  0   
2   30  Female         Single                3                  1   
3   58  Female        Married                4                  0   
4   32    Male        Married                2                  2   

   Credit Utilization Ratio  Payment History  Number of Credit Accounts  \
0                      0.22           2685.0                          2   
1                      0.20           2371.0                          9   
2                      0.22           2771.0                          6   
3                      0.12           1371.0                          2   
4                      0.99            828.0                          2   

   Loan Amount  Interest Rate  Loan Term   Type of Loan  Credit Score  
0      4675000           2.65         48  Personal Loan       

*MODEL*

In [9]:
data.head()

Unnamed: 0,Age,Gender,Marital Status,Education Level,Employment Status,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term,Type of Loan,Credit Score
0,60,Male,Married,3,1,0.22,2685.0,2,4675000,2.65,48,Personal Loan,940.516
1,25,Male,Married,1,0,0.2,2371.0,9,3619000,5.19,60,Auto Loan,831.36
2,30,Female,Single,3,1,0.22,2771.0,6,957000,2.76,12,Auto Loan,971.216
3,58,Female,Married,4,0,0.12,1371.0,2,4731000,6.57,60,Auto Loan,480.586
4,32,Male,Married,2,2,0.99,828.0,2,3289000,6.28,36,Personal Loan,290.797


In [10]:
data['Gender'] = data['Gender'].map({
    'Male':1,
    'Female':0
})
data['Marital Status'] = data['Marital Status'].map({
    'Married':1,
    'Single':0
})
data['Marital Status'] = data['Marital Status'].fillna(1)
data['Type of Loan'] = data['Type of Loan'].map({
    'Personal Loan':1, 'Auto Loan':2, 'Home Loan':3
})

In [11]:
data["Credit Utilization Ratio"] = [x * 100 for x in data["Credit Utilization Ratio"]]


In [12]:
import keras
from keras.models import Sequential
from keras.layers import Dense, BatchNormalization, Dropout
from keras.optimizers import AdamW
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from keras.losses import Huber
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [13]:
x = data[['Age','Gender','Marital Status','Education Level','Employment Status','Credit Utilization Ratio','Payment History','Number of Credit Accounts','Loan Amount','Interest Rate','Loan Term','Type of Loan']]
y = data['Credit Score']

In [14]:
xtrain, xtest, ytrain, ytest = train_test_split(x,y, test_size=0.20)

In [15]:
model = Sequential([
    Dense(128, input_shape=(xtrain.shape[1],), activation='relu', kernel_regularizer=keras.regularizers.L2(0.01)),
    BatchNormalization(),  
    Dropout(0.4),
    
    Dense(64, activation='relu', kernel_regularizer=keras.regularizers.L2(0.01)),
    BatchNormalization(),
    Dropout(0.3),
    
    Dense(32, activation='relu', kernel_regularizer=keras.regularizers.L2(0.01)),
    BatchNormalization(),
    Dropout(0.2),

    Dense(16, activation='relu', kernel_regularizer=keras.regularizers.L2(0.01)),
    BatchNormalization(),
    Dropout(0.1),

    Dense(1, activation='linear') 
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [16]:
optimizer = AdamW(learning_rate=0.001, weight_decay=1e-4)  
model.compile(optimizer=optimizer, loss=keras.losses.Huber(), metrics=['mae'])
model.summary()

In [17]:
callbacks = [
    EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
]

In [18]:
history = model.fit(xtrain, ytrain, epochs=200, batch_size=16, validation_data=(xtest, ytest), callbacks=callbacks)

Epoch 1/200
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - loss: 525.1424 - mae: 524.0705 - val_loss: 476.8301 - val_mae: 476.0678 - learning_rate: 0.0010
Epoch 2/200
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 513.5942 - mae: 512.8981 - val_loss: 462.9958 - val_mae: 462.4559 - learning_rate: 0.0010
Epoch 3/200
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 520.0596 - mae: 519.5562 - val_loss: 462.1780 - val_mae: 461.7558 - learning_rate: 0.0010
Epoch 4/200
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 515.8840 - mae: 515.4846 - val_loss: 461.6349 - val_mae: 461.2867 - learning_rate: 0.0010
Epoch 5/200
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 512.5844 - mae: 512.2507 - val_loss: 461.0364 - val_mae: 460.7391 - learning_rate: 0.0010
Epoch 6/200
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 

In [19]:
model.save("credit_score_model.h5")



In [20]:
x.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Age                        1000 non-null   int64  
 1   Gender                     1000 non-null   int64  
 2   Marital Status             1000 non-null   float64
 3   Education Level            1000 non-null   int64  
 4   Employment Status          1000 non-null   int64  
 5   Credit Utilization Ratio   1000 non-null   float64
 6   Payment History            1000 non-null   float64
 7   Number of Credit Accounts  1000 non-null   int64  
 8   Loan Amount                1000 non-null   int64  
 9   Interest Rate              1000 non-null   float64
 10  Loan Term                  1000 non-null   int64  
 11  Type of Loan               1000 non-null   int64  
dtypes: float64(4), int64(8)
memory usage: 93.9 KB


In [21]:
data.head()

Unnamed: 0,Age,Gender,Marital Status,Education Level,Employment Status,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term,Type of Loan,Credit Score
0,60,1,1.0,3,1,22.0,2685.0,2,4675000,2.65,48,1,940.516
1,25,1,1.0,1,0,20.0,2371.0,9,3619000,5.19,60,2,831.36
2,30,0,0.0,3,1,22.0,2771.0,6,957000,2.76,12,2,971.216
3,58,0,1.0,4,0,12.0,1371.0,2,4731000,6.57,60,2,480.586
4,32,1,1.0,2,2,99.0,828.0,2,3289000,6.28,36,1,290.797
