Import Dependencies

In [60]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import tensorflow as tf

Load Data and Data Preprocessing

Since the dataset is already cleaned the data preprocessing is easier

In [61]:
data = pd.read_csv('/content/LoanDataRiskAnalysis.csv')

Separate the labels which is what we want out model to predict from the features which is the data we will feed to our model in order to make the predictions

In the features we have to drop the columns Loan approved and application data first the application date is not relevant and LoanApproved is what we want to approve

In [62]:
features = data.drop(columns=['LoanApproved', 'ApplicationDate'])

target = data['LoanApproved']

Encode the categorical values

In [63]:
features = pd.get_dummies(features)

In [64]:
features.head(3)

Unnamed: 0,Age,AnnualIncome,CreditScore,Experience,LoanAmount,LoanDuration,NumberOfDependents,MonthlyDebtPayments,CreditCardUtilizationRate,NumberOfOpenCreditLines,...,MaritalStatus_Widowed,HomeOwnershipStatus_Mortgage,HomeOwnershipStatus_Other,HomeOwnershipStatus_Own,HomeOwnershipStatus_Rent,LoanPurpose_Auto,LoanPurpose_Debt Consolidation,LoanPurpose_Education,LoanPurpose_Home,LoanPurpose_Other
0,45,236513,588,23,18746,72,4,294,0.082171,5,...,False,False,True,False,False,False,True,False,False,False
1,38,60500,543,16,21814,60,0,328,0.308603,3,...,False,True,False,False,False,False,False,True,False,False
2,47,30104,527,28,18811,60,3,214,0.250898,2,...,False,True,False,False,False,False,False,False,True,False


Scale the numerical features

In [65]:
scaler = StandardScaler()
features = scaler.fit_transform(features)

Split the data into test and train

75% of the data will be used for training the model

25% of the data will be used for testing the model predictions

In [66]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.25, random_state=42)

Build a simple neural network with keras and tensorflow

For the activation output layer I am using the sigmoid function because the problem is binary 1 the loan approved and 0 the loan not approved

In [67]:
# Build neural network
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Optimizer
1. Try the Adadelta and as a loss function establich the binary cross entropy
2. As a metric establish the accuracy

Weight Initialization: Use initializers like HeNormal for ReLU-based layers to improve convergence.

Regularization: Add dropout layers to prevent overfitting.

Learning Rate Scheduler: Dynamically adjust the learning rate during training to improve convergence.



In [68]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

In [69]:
from tensorflow.keras.optimizers import Adadelta

In [70]:
# Compile the model

# Set the Learning rate Scheduler
lr_schedule = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=1)
optimizer = Adadelta(learning_rate=0.001)

model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

Train the model

In [71]:
model_training = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.4898 - loss: 0.8888 - val_accuracy: 0.4864 - val_loss: 0.7498
Epoch 2/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5108 - loss: 0.8604 - val_accuracy: 0.5024 - val_loss: 0.7990
Epoch 3/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5133 - loss: 0.8570 - val_accuracy: 0.5104 - val_loss: 0.8214
Epoch 4/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5107 - loss: 0.8573 - val_accuracy: 0.5168 - val_loss: 0.8246
Epoch 5/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5234 - loss: 0.8336 - val_accuracy: 0.5184 - val_loss: 0.8257
Epoch 6/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5213 - loss: 0.8334 - val_accuracy: 0.5248 - val_loss: 0.8190
Epoch 7/20
[1m118/118[0m 

Evaluate the model

In [72]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5683 - loss: 0.7465 
Test Accuracy: 0.57


Create predictions

In [73]:
predictions = model.predict(X_test)

# Convert probabilities to binary labels
predicted_labels = (predictions > 0.5).astype(int)

[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


These are the logits but we have to turn the logits to prediction probabilities and then to actual predictions

In [74]:
print(predicted_labels)

[[1]
 [1]
 [0]
 ...
 [0]
 [0]
 [0]]


Hyperparameter Tuning

In [76]:
!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [77]:
import keras_tuner as kt

In [78]:
from tensorflow.keras.optimizers import Adam

In [79]:

def build_model(hp):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(
        hp.Int('units', min_value=32, max_value=128, step=32),
        activation='relu'))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
    model.compile(optimizer=Adam(learning_rate=hp.Choice('learning_rate', [0.001, 0.0001])),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

tuner = kt.Hyperband(build_model, objective='val_accuracy', max_epochs=10, factor=3)
tuner.search(X_train, y_train, validation_data=(X_test, y_test), epochs=50)

Trial 8 Complete [00h 00m 03s]
val_accuracy: 0.8479999899864197

Best val_accuracy So Far: 0.9904000163078308
Total elapsed time: 00h 00m 23s
