### Libraries

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LeakyReLU
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import SGD
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
import matplotlib.pyplot as plt

### Reading & preprocessing data

In [3]:
df = pd.read_csv("df_cleaned.csv")
df.head()

Unnamed: 0,last_pymnt_d,total_rec_prncp,last_pymnt_amnt,out_prncp,total_rec_late_fee,last_fico_range_high,installment,loan_amnt,total_rec_int,out_prncp_inv,...,funded_amnt_inv,recoveries,debt_settlement_flag,hardship_flag,mo_sin_old_rev_tl_op,revol_util,dti,all_util,annual_inc,loan_status
0,17,-0.332581,-0.372797,0.42008,-0.261895,0.281611,-0.473027,-0.461473,0.413942,0.420298,...,-0.461004,-0.445598,0,0,-0.307356,0.608376,0.558953,0.91582,-1.203816,0
1,31,-0.84056,-0.473603,-0.62922,-0.261895,1.244049,-1.608631,-1.60151,-1.081823,-0.629167,...,-1.601271,-0.445598,0,0,-0.187187,-1.880835,-1.233491,-0.049124,0.274631,1
2,27,-0.25072,-0.222228,2.263937,-0.261895,0.883135,1.252073,1.058575,-0.380182,2.264445,...,1.059353,-0.445598,0,0,-0.980297,0.431193,-0.223328,0.37974,2.227297,1
3,39,-0.458023,0.161605,-0.62922,-0.261895,0.161306,-1.364264,-1.373503,-0.909433,-0.629167,...,-1.373218,-0.445598,0,0,-1.472987,0.314511,-0.337836,0.433348,-1.231711,1
4,5,-0.415519,0.220569,-0.62922,-0.261895,0.221459,-1.317547,-1.348168,-0.83444,-0.629167,...,-1.347878,-0.445598,0,0,0.738107,1.273894,0.117928,1.559116,-0.841178,1


In [4]:
X = df.drop(["loan_status", "total_rec_int", "last_pymnt_amnt", "last_fico_range_high",
             "all_util"], axis = 1)
y = df["loan_status"]

X

Unnamed: 0,last_pymnt_d,total_rec_prncp,out_prncp,total_rec_late_fee,installment,loan_amnt,out_prncp_inv,total_pymnt,funded_amnt_inv,recoveries,debt_settlement_flag,hardship_flag,mo_sin_old_rev_tl_op,revol_util,dti,annual_inc
0,17,-0.332581,0.420080,-0.261895,-0.473027,-0.461473,0.420298,-0.222676,-0.461004,-0.445598,0,0,-0.307356,0.608376,0.558953,-1.203816
1,31,-0.840560,-0.629220,-0.261895,-1.608631,-1.601510,-0.629167,-1.102958,-1.601271,-0.445598,0,0,-0.187187,-1.880835,-1.233491,0.274631
2,27,-0.250720,2.263937,-0.261895,1.252073,1.058575,2.264445,-0.393139,1.059353,-0.445598,0,0,-0.980297,0.431193,-0.223328,2.227297
3,39,-0.458023,-0.629220,-0.261895,-1.364264,-1.373503,-0.629167,-0.727889,-1.373218,-0.445598,0,0,-1.472987,0.314511,-0.337836,-1.231711
4,5,-0.415519,-0.629220,-0.261895,-1.317547,-1.348168,-0.629167,-0.669368,-1.347878,-0.445598,0,0,0.738107,1.273894,0.117928,-0.841178
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119281,36,1.072123,-0.629220,-0.261895,-0.473196,-0.461473,-0.629167,0.659788,-0.461004,-0.445598,0,0,0.353569,0.720736,-0.931916,4.179963
119282,26,0.475587,-0.629220,-0.261895,1.079697,0.555059,-0.629167,0.784970,0.555735,-0.445598,0,0,-0.727944,0.405263,-0.997673,1.390440
119283,27,-0.983894,0.132695,-0.261895,-0.951061,-1.078993,0.132868,-1.184196,-1.078649,-0.445598,0,0,2.336344,-0.109001,0.109992,-1.426978
119284,13,0.639197,-0.629220,3.433568,0.636417,1.311917,-0.629167,1.330306,1.312745,-0.445598,1,0,-0.259288,-0.484975,-0.158705,0.001258


In [17]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=737)

### 2 Layer Neural Network Model

In [25]:
# Define the model
nn = Sequential()

# First hidden layer
nn.add(Dense(512,
                input_shape=(16, ),  # Replace input_dim with your input feature size
                kernel_regularizer=regularizers.l2(0.2)))  # lambda = 0.2 (L2 regularization)
nn.add(LeakyReLU(alpha=0.01))  # leaky relu activation, alpha ~ 0.01
nn.add(Dropout(0.5))  # prob = 0.5

# Second hidden layer
nn.add(Dense(512,
                kernel_regularizer=regularizers.l2(0.2)))
nn.add(LeakyReLU(alpha=0.01))
nn.add(Dropout(0.5))

# Output layer (for binary classification; change units and activation if needed)
nn.add(Dense(1, activation='sigmoid'))

# Optimizer - Mini-batch Gradient Descent
optimizer = SGD(learning_rate=0.01)

# Compile the model
nn.compile(optimizer=optimizer,
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
nn.fit(x_train, y_train,
          batch_size=64,  # batch = 64
          epochs=20,  # you define epochs separately
          validation_split=0.2)

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1193/1193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 11ms/step - accuracy: 0.6915 - loss: 32.9403 - val_accuracy: 0.7842 - val_loss: 0.6510
Epoch 2/20
[1m1193/1193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 10ms/step - accuracy: 0.7792 - loss: 0.6466 - val_accuracy: 0.8270 - val_loss: 0.6001
Epoch 3/20
[1m1193/1193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 10ms/step - accuracy: 0.7736 - loss: 0.6300 - val_accuracy: 0.8000 - val_loss: 0.6126
Epoch 4/20
[1m1193/1193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 10ms/step - accuracy: 0.7641 - loss: 0.6279 - val_accuracy: 0.8153 - val_loss: 0.5782
Epoch 5/20
[1m1193/1193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 11ms/step - accuracy: 0.7594 - loss: 0.6258 - val_accuracy: 0.7073 - val_loss: 0.6767
Epoch 6/20
[1m1193/1193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 11ms/step - accuracy: 0.7535 - loss: 0.6275 - val_accuracy: 0.7873 - val_loss: 0.5930
Epoch 7/20
[1

<keras.src.callbacks.history.History at 0x79e2885bde90>

In [26]:
# nn = 2_layer_neural_network_model(x_train, y_train)

In [30]:
y_pred_nn = nn.predict(x_test)
y_pred_nn = (y_pred_nn > 0.5).astype(int)

[1m746/746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step


array([[1],
       [1],
       [0],
       ...,
       [1],
       [0],
       [0]])

### Performance Measures

In [31]:
accuracy_score(y_test, y_pred_nn)

0.8605918350238914

In [32]:
f1_score(y_test, y_pred_nn)

0.870915159512536

In [33]:
roc_auc_score(y_test, nn.predict_proba(x_test)[:, 1])

AttributeError: 'Sequential' object has no attribute 'predict_proba'

### Interpretability