In [1]:
# IMPORT REQUIRED LIBRARIES
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [2]:
# LOAD THE DATASET
df = pd.read_csv("Synthetic_Loan_Default.csv")
df.head(7)

Unnamed: 0,Customer_ID,Age,Annual_Income,Credit_Score,Loan_Amount,Loan_Term,Employment_Type,Marital_Status,Dependents,Has_Credit_Card,Previous_Default,Loan_Default
0,221958,32,91831,588,42548,36,Self-Employed,Married,4,No,No,0
1,771155,21,139503,571,41945,60,Unemployed,Divorced,0,No,No,1
2,231932,46,135760,365,11476,24,Self-Employed,Married,2,Yes,Yes,1
3,465838,34,121529,782,29565,36,Unemployed,Divorced,4,Yes,No,0
4,359178,58,147834,547,7382,48,Self-Employed,Single,2,Yes,Yes,1
5,744167,57,54084,455,27463,36,Salaried,Divorced,4,Yes,Yes,1
6,210268,31,140387,346,38402,12,Salaried,Divorced,4,Yes,No,0


In [6]:
# IDENTIFY target AND Feature
# Target variable
y = df["Loan_Default"]

# Feature variables
X = df.drop(columns=["Loan_Default", "Customer_ID"])

In [10]:
# Handle Categorical Data(Transformation)
X = pd.get_dummies(X, drop_first=True)
X.head(7)


Unnamed: 0,Age,Annual_Income,Credit_Score,Loan_Amount,Loan_Term,Dependents,Employment_Type_Self-Employed,Employment_Type_Unemployed,Marital_Status_Married,Marital_Status_Single,Has_Credit_Card_Yes,Previous_Default_Yes
0,32,91831,588,42548,36,4,True,False,True,False,False,False
1,21,139503,571,41945,60,0,False,True,False,False,False,False
2,46,135760,365,11476,24,2,True,False,True,False,True,True
3,34,121529,782,29565,36,4,False,True,False,False,True,False
4,58,147834,547,7382,48,2,True,False,False,True,True,True
5,57,54084,455,27463,36,4,False,False,False,False,True,True
6,31,140387,346,38402,12,4,False,False,False,False,True,False


In [12]:
# TRAIN_TEST_SPLIT
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

In [13]:
#Feature Scaling (Standardization)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [16]:
# Train ML Models
 #1.LOGISTIC REGRESSION
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train_scaled, y_train)



In [17]:
# Prediction and Model Accuracy
lr_pred = lr.predict(X_test_scaled)
print("Logistic Regression Accuracy:", accuracy_score(y_test, lr_pred)*100)

Logistic Regression Accuracy: 83.33333333333334


In [18]:
#2.DECISIONTREE CLASSIFIER
dt = DecisionTreeClassifier()
dt.fit(X_train_scaled, y_train)



In [19]:
# Prediction and Model Accuracy
dt_pred = dt.predict(X_test_scaled)
print("Decision Tree Accuracy:", accuracy_score(y_test, dt_pred)*100)

Decision Tree Accuracy: 85.0


In [20]:
#3.RANDOMFOREST CLASSIFIER
rf=RandomForestClassifier()
rf.fit(X_train_scaled, y_train)



In [21]:
# # Prediction and Model Accuracy
rf_pred = rf.predict(X_test_scaled)
print("Random Forest Accuracy:", accuracy_score(y_test, rf_pred)*100)

Random Forest Accuracy: 89.16666666666667


In [23]:
# classification report
print(classification_report(y_test, rf_pred))


              precision    recall  f1-score   support

           0       0.83      0.93      0.88       102
           1       0.94      0.86      0.90       138

    accuracy                           0.89       240
   macro avg       0.89      0.90      0.89       240
weighted avg       0.90      0.89      0.89       240



                     | Model               | Accuracy  |
                     | ------------------- | --------- |
                     | Logistic Regression | 83.3%     |
                     | Decision Tree       | 85.0%     |
                     | **Random Forest**   | **89.16%**|
