<a href="https://colab.research.google.com/github/Samenergy/Formative-1-Databases/blob/main/Notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, regularizers
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from imblearn.over_sampling import SMOTE

In [4]:
# Loading the data from a CSV file
data = pd.read_csv('loan_data.csv')


# Displaying the first few rows of the data
data.head()

Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,22.0,female,Master,71948.0,0,RENT,35000.0,PERSONAL,16.02,0.49,3.0,561,No,1
1,21.0,female,High School,12282.0,0,OWN,1000.0,EDUCATION,11.14,0.08,2.0,504,Yes,0
2,25.0,female,High School,12438.0,3,MORTGAGE,5500.0,MEDICAL,12.87,0.44,3.0,635,No,1
3,23.0,female,Bachelor,79753.0,0,RENT,35000.0,MEDICAL,15.23,0.44,2.0,675,No,1
4,24.0,male,Master,66135.0,1,RENT,35000.0,MEDICAL,14.27,0.53,4.0,586,No,1


In [5]:
# Preprocessing the data
label_encoder = LabelEncoder()
categorical_columns = ['person_gender', 'person_education', 'person_home_ownership', 'loan_intent', 'previous_loan_defaults_on_file', 'loan_status']
for col in categorical_columns:
   data[col] = label_encoder.fit_transform(data[col])

In [6]:
# Separating the features (X) and target (y)
X = data.drop(columns=['loan_status'])
y = data['loan_status']


# Applying SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

In [7]:
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)


# Normalizing the features
X_train = X_train / X_train.max()
X_test = X_test / X_test.max()


 # defining and compiling the model

In [8]:
# Function to define and compile the model
def define_model(optimizer='adam', regularization=None, early_stopping=False, dropout=0.5, learning_rate=0.001):
   model = tf.keras.Sequential()
   reg = None if regularization is None else regularizers.l2(regularization)
 # Input layer and first hidden layer
   model.add(layers.Dense(64, activation='relu', input_dim=X_train.shape[1], kernel_regularizer=reg))
   model.add(layers.Dropout(dropout))

   # Second hidden layer
   model.add(layers.Dense(32, activation='relu', kernel_regularizer=reg))
   model.add(layers.Dropout(dropout))

   # Output layer
   model.add(layers.Dense(1, activation='sigmoid'))

   # Choosing optimizer
   if optimizer == 'adam':
       optimizer = Adam(learning_rate=learning_rate)
   elif optimizer == 'rmsprop':
       optimizer = RMSprop(learning_rate=learning_rate)
   else:
       optimizer = Adam(learning_rate=learning_rate)

   model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

   # Callbacks
   callbacks = [EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)] if early_stopping else []

   history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test), callbacks=callbacks)
   return model, history


# Function to plot loss curves


In [9]:
# Function to plot loss curves
def loss_curve_plot(history):
   plt.figure(figsize=(12, 6))
   plt.plot(history.history['loss'], 'bo', label='Training loss')
   plt.plot(history.history['val_loss'], 'r', label='Validation loss')
   plt.title('Training and Validation Loss')
   plt.xlabel('Epochs')
   plt.ylabel('Loss')
   plt.legend()
   plt.show()
