In [1]:
# Import our dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LeakyReLU
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping


In [2]:
# Import and read the loan_data_cleaned.csv.
loan_data_df = pd.read_csv("./Resources/loan_data_cleaned.csv")  
loan_data_df.head()


Unnamed: 0.1,Unnamed: 0,loan_id,no_of_dependents,education,self_employed,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value,loan_status
0,0,1,2,Graduate,No,96000,299000,12,778,24000,176000,227000,80000,Approved
1,1,2,0,Not Graduate,Yes,41000,122000,8,417,27000,22000,88000,33000,Rejected
2,2,3,3,Graduate,No,91000,297000,20,506,71000,45000,333000,128000,Rejected
3,3,4,3,Graduate,No,82000,307000,8,467,182000,33000,233000,79000,Rejected
4,4,5,5,Not Graduate,Yes,98000,242000,20,382,124000,82000,294000,50000,Rejected


In [3]:
# Drop loan_id
loan_data_df = loan_data_df.drop("loan_id", axis=1)

# Handle missing values (if any). 
loan_data_df.fillna(loan_data_df.mean(), inplace=True)


  loan_data_df.fillna(loan_data_df.mean(), inplace=True)


In [4]:
loan_data_df.head()

Unnamed: 0.1,Unnamed: 0,no_of_dependents,education,self_employed,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value,loan_status
0,0,2,Graduate,No,96000,299000,12,778,24000,176000,227000,80000,Approved
1,1,0,Not Graduate,Yes,41000,122000,8,417,27000,22000,88000,33000,Rejected
2,2,3,Graduate,No,91000,297000,20,506,71000,45000,333000,128000,Rejected
3,3,3,Graduate,No,82000,307000,8,467,182000,33000,233000,79000,Rejected
4,4,5,Not Graduate,Yes,98000,242000,20,382,124000,82000,294000,50000,Rejected


In [5]:
unique_counts = loan_data_df.nunique()
print(unique_counts)

Unnamed: 0                   4269
 no_of_dependents               6
 education                      2
 self_employed                  2
 income_annum                  98
 loan_amount                  378
 loan_term                     10
 cibil_score                  601
 residential_assets_value     278
 commercial_assets_value      188
 luxury_assets_value          379
 bank_asset_value             146
 loan_status                    2
dtype: int64


In [6]:
# Check the distribution
print(loan_data_df[' cibil_score'].describe())


count    4269.000000
mean      599.936051
std       172.430401
min       300.000000
25%       453.000000
50%       600.000000
75%       748.000000
max       900.000000
Name:  cibil_score, dtype: float64


In [7]:
# Outlier Removal: For numerical column cibil_score
loan_data_df[' cibil_score'] = np.log(loan_data_df[' cibil_score'] + 1)

In [8]:
# Convert categorical data to numeric with `pd.get_dummies`
application_df = pd.get_dummies(loan_data_df)
print(loan_data_df)

      Unnamed: 0   no_of_dependents      education  self_employed  \
0              0                  2       Graduate             No   
1              1                  0   Not Graduate            Yes   
2              2                  3       Graduate             No   
3              3                  3       Graduate             No   
4              4                  5   Not Graduate            Yes   
...          ...                ...            ...            ...   
4264        4264                  5       Graduate            Yes   
4265        4265                  0   Not Graduate            Yes   
4266        4266                  2   Not Graduate             No   
4267        4267                  1   Not Graduate             No   
4268        4268                  1       Graduate             No   

       income_annum   loan_amount   loan_term   cibil_score  \
0             96000        299000          12      6.658011   
1             41000        122000           8

In [9]:
label_encoders = {}
for column in [' education', ' self_employed', ' loan_status']:
    le = LabelEncoder()
    loan_data_df[column] = le.fit_transform(loan_data_df[column])
    label_encoders[column] = le

In [10]:
# Splitting the data
X = loan_data_df.drop(" loan_status", axis=1)
y = loan_data_df[" loan_status"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Standardize the numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [11]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train_scaled[0])

nn = tf.keras.models.Sequential()

# First hidden layer with L2 regularization
nn.add(tf.keras.layers.Dense(units=number_input_features*2, activation="relu", kernel_regularizer=l2(0.01), input_dim=number_input_features))
nn.add(tf.keras.layers.Dropout(0.2))  # 20% dropout

# Second hidden layer with L2 regularization
nn.add(tf.keras.layers.Dense(units=number_input_features, activation="relu", kernel_regularizer=l2(0.01)))
nn.add(tf.keras.layers.Dropout(0.2))  # 20% dropout

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 24)                312       
                                                                 
 dropout (Dropout)           (None, 24)                0         
                                                                 
 dense_1 (Dense)             (None, 12)                300       
                                                                 
 dropout_1 (Dropout)         (None, 12)                0         
                                                                 
 dense_2 (Dense)             (None, 1)                 13        
                                                                 
Total params: 625 (2.44 KB)
Trainable params: 625 (2.44 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [12]:
# Create callback for early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Define the optimizer
optimizer = Adam(learning_rate=0.001)

# Compile the model
nn.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [13]:
# Train the model
history = nn.fit(
    X_train_scaled,
    y_train,
    epochs=100,
    batch_size=32,
    validation_data=(X_test_scaled, y_test),
    verbose=1,
    callbacks=[early_stopping]
)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [14]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

34/34 - 0s - loss: 0.1477 - accuracy: 0.9644 - 44ms/epoch - 1ms/step
Loss: 0.14767877757549286, Accuracy: 0.9644194841384888


In [15]:
# Save the trained model
#nn.save("loan_approval_model.h5")
#print("Model saved to loan_approval_model.h5")

In [16]:
# Define an optimized deep neural network model
number_input_features = len(X_train_scaled[0])

optimized_nn = tf.keras.models.Sequential()

# First hidden layer with L2 regularization and swish activation
optimized_nn.add(tf.keras.layers.Dense(units=number_input_features*3, activation="swish", kernel_regularizer=l2(0.01), input_dim=number_input_features))
optimized_nn.add(tf.keras.layers.Dropout(0.3))  # 30% dropout

# Second hidden layer with L2 regularization and swish activation
optimized_nn.add(tf.keras.layers.Dense(units=number_input_features*2, activation="swish", kernel_regularizer=l2(0.01)))
optimized_nn.add(tf.keras.layers.Dropout(0.3))  # 30% dropout

# Third hidden layer
optimized_nn.add(tf.keras.layers.Dense(units=number_input_features, activation="relu"))

# Output layer
optimized_nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

#display
optimized_nn.summary()



Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 36)                468       
                                                                 
 dropout_2 (Dropout)         (None, 36)                0         
                                                                 
 dense_4 (Dense)             (None, 24)                888       
                                                                 
 dropout_3 (Dropout)         (None, 24)                0         
                                                                 
 dense_5 (Dense)             (None, 12)                300       
                                                                 
 dense_6 (Dense)             (None, 1)                 13        
                                                                 
Total params: 1669 (6.52 KB)
Trainable params: 1669 (6

In [17]:
# Create callback for early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

# Define the optimizer with a learning rate decay
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-2,
    decay_steps=10000,
    decay_rate=0.9)
optimizer = Adam(learning_rate=lr_schedule)

# Compile the model
optimized_nn.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [18]:
# Train the model
history = optimized_nn.fit(
    X_train_scaled,
    y_train,
    epochs=150,
    batch_size=64,
    validation_data=(X_test_scaled, y_test),
    verbose=1,
    callbacks=[early_stopping]
)


Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150


In [19]:

# Evaluate the model using the test data
model_loss, model_accuracy = optimized_nn.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

34/34 - 0s - loss: 0.1977 - accuracy: 0.9373 - 44ms/epoch - 1ms/step
Loss: 0.19768062233924866, Accuracy: 0.937265932559967


In [20]:
# Define the model
number_input_features = len(X_train_scaled[0])

advanced_nn = tf.keras.models.Sequential()

# Input and first hidden layer with LeakyReLU
advanced_nn.add(Dense(units=number_input_features*3, input_dim=number_input_features))
advanced_nn.add(LeakyReLU(alpha=0.01))

# Second hidden layer with LeakyReLU
advanced_nn.add(Dense(units=number_input_features*2))
advanced_nn.add(LeakyReLU(alpha=0.01))

# Third hidden layer with tanh
advanced_nn.add(Dense(units=number_input_features, activation='tanh'))

# Fourth hidden layer with tanh
advanced_nn.add(Dense(units=number_input_features//2, activation='tanh'))

# Output layer
advanced_nn.add(Dense(units=1, activation="sigmoid"))

#Display
advanced_nn.summary()



Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_7 (Dense)             (None, 36)                468       
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 36)                0         
                                                                 
 dense_8 (Dense)             (None, 24)                888       
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 24)                0         
                                                                 
 dense_9 (Dense)             (None, 12)                300       
                                                                 
 dense_10 (Dense)            (None, 6)                 78        
                                                                 
 dense_11 (Dense)            (None, 1)                

In [21]:
# Compile the model with an adaptive learning rate
optimizer = Adam(learning_rate=0.001)
advanced_nn.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [22]:
# Train the model
num_epochs = 150
history = advanced_nn.fit(
    X_train_scaled,
    y_train,
    epochs=num_epochs,
    batch_size=32,
    validation_data=(X_test_scaled, y_test),
    verbose=1
)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

In [23]:
# Evaluate the model using the test data
model_loss, model_accuracy = advanced_nn.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

34/34 - 0s - loss: 0.1769 - accuracy: 0.9560 - 41ms/epoch - 1ms/step
Loss: 0.1769091933965683, Accuracy: 0.9559925198554993


In [24]:
import pandas as pd

# Capture the model summaries as strings
from io import StringIO
import sys

def get_model_summary(model):
    stream = StringIO()
    sys.stdout = stream
    model.summary()
    sys.stdout = sys.__stdout__
    return stream.getvalue()

# Get model summaries
nn_summary = get_model_summary(nn)
optimized_nn_summary = get_model_summary(optimized_nn)
advanced_nn_summary = get_model_summary(advanced_nn)

# Data for each model
data = {
    'Model': ['nn', 'optimized_nn', 'advanced_nn'],
    'Summary': [nn_summary, optimized_nn_summary, advanced_nn_summary],
    'Loss': [0.1478002965450287, 1.0808980464935303, 0.11039558053016663],
    'Accuracy': [0.9700374603271484, 0.6385768055915833, 0.9691011309623718]
}

# Convert the data to a DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('Deep_model_comparison.csv', index=False)


In [25]:
import plotly.graph_objects as go

# Data for each model
models = ['nn', 'optimized_nn', 'advanced_nn']
accuracies = [97.00, 63.85, 96.91]  
losses = [14.78, 1.08, 11.03]  

# Create traces for accuracies and losses
trace1 = go.Bar(
    x=models,
    y=accuracies,
    name='Accuracy',
    marker=dict(color=['blue', 'green', 'red']),
    hoverinfo='y',
    text=accuracies,
    textposition='auto',
    hovertemplate=
    '<b>Model</b>: %{x}<br><b>Accuracy</b>: %{y:.2f}%<extra></extra>'  # Display value as percentage
)

trace2 = go.Bar(
    x=models,
    y=losses,
    name='Loss',
    marker=dict(color=['lightblue', 'lightgreen', 'lightcoral']),
    hoverinfo='y',
    text=losses,
    textposition='auto',
    hovertemplate=
    '<b>Model</b>: %{x}<br><b>Loss</b>: %{y:.2f}%<extra></extra>'  # Display value as percentage
)

# Create the layout and add traces
layout = go.Layout(
    title='Deep Learning Model Accuracy and Loss Comparison metrics',
    barmode='group'
)

fig = go.Figure(data=[trace1, trace2], layout=layout)

# Show plot
fig.show()

