In [None]:
# import data handling and vizualization libraries
import pandas as pd 
import numpy as np 
import seaborn as sns 
import matplotlib.pyplot as plt

In [None]:
# import model
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
# SMOTE -- Synthetic minority over sampling technique
from imblearn.over_sampling import SMOTE

In [None]:
df = pd.read_csv('Data/Loan_Default.csv')
df.head()

In [None]:
# Input features
X = df[['Age', 'Income', 'LoanAmount', 'CreditScore', 'MonthsEmployed', 'NumCreditLines', 'InterestRate', 'LoanTerm', 'DTIRatio']]
# Target variable
y = df['Default']

In [None]:
# Check if any nulls present
print(X.isnull().sum())
print(y.isnull().sum())

In [None]:
# Train, test split
# 80% train, 20% test
X_train, X_test, y_train, y_test = train_test_split (
    X, y,
    test_size = 0.2,
    random_state = 42
)

smote = SMOTE(random_state = 42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

print(X_train_res.shape)
print(X_test.shape)

In [None]:
# Load LinearRegression model
model = LogisticRegression()
model.fit(X_train_res, y_train_res)

In [None]:
# Check model performance
y_pred = model.predict(X_test)
print('Accuracy Score: ', accuracy_score(y_test, y_pred))
print('precision Score: ', precision_score(y_test, y_pred))
print('recall Score: ', recall_score(y_test, y_pred))

In [None]:
# Confusion matrix
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)


sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()