### STEP-1 Importing the required libraries

In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt

In [3]:
df = pd.read_excel(r'C:\Users\Olym\Downloads\Delinquency_prediction_dataset.xlsx')

### Step-2 Defining X (features) and y (target)


In [4]:
# Defining X (features) and y (target)
features = [
    'Age','Income','Credit_Score','Credit_Utilization','Missed_Payments',
    'Loan_Balance','Debt_to_Income_Ratio','Account_Tenure'
]

X = df[features]
y = df['Delinquent_Account']

In [5]:
df.head()

Unnamed: 0,Customer_ID,Age,Income,Credit_Score,Credit_Utilization,Missed_Payments,Delinquent_Account,Loan_Balance,Debt_to_Income_Ratio,Employment_Status,Account_Tenure,Credit_Card_Type,Location,Month_1,Month_2,Month_3,Month_4,Month_5,Month_6,Unnamed: 19
0,CUST0001,56,165580,398.0,0.390502,3,0,16310.0,0.317396,Employed,18,Student,Los Angeles,Late,Late,Missed,Late,Missed,Late,Customer_ID
1,CUST0002,69,100999,493.0,0.312444,6,1,17401.0,0.196093,Self-Employed,0,Standard,Phoenix,Missed,Missed,Late,Missed,On-time,On-time,Age
2,CUST0003,46,188416,500.0,0.35993,0,0,13761.0,0.301655,Self-Employed,1,Platinum,Chicago,Missed,Late,Late,On-time,Missed,Late,Income
3,CUST0004,32,101672,413.0,0.3714,3,0,88778.0,0.264794,UnEmployed,15,Platinum,Phoenix,Late,Missed,Late,Missed,Late,Late,Credit_Score
4,CUST0005,60,38524,487.0,0.234716,2,0,13316.0,0.510583,Self-Employed,11,Standard,Phoenix,Missed,On-time,Missed,Late,Late,Late,Credit_Utilization


### STEP-3 Splitting Data into Train and Test

In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

### STEP-4 Train the Decision Tree Model

In [7]:
model = DecisionTreeClassifier(
    max_depth=4,      # keeps the tree simple (prevents overfitting)
    random_state=42
)

model.fit(X_train, y_train)

### STEP-5 Make Predictions

In [8]:
predictions = model.predict(X_test)

In [13]:
print("\nðŸŒ³ Decision Tree Performance")

print("\nClassification Report:\n", classification_report(y_test, predictions))

print("Accuracy:", accuracy_score(y_test, predictions))

print("\nClassification Report:")
print(classification_report(y_test, predictions))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, predictions))


ðŸŒ³ Decision Tree Performance

Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.99      0.92        86
           1       0.00      0.00      0.00        14

    accuracy                           0.85       100
   macro avg       0.43      0.49      0.46       100
weighted avg       0.74      0.85      0.79       100

Accuracy: 0.85

Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.99      0.92        86
           1       0.00      0.00      0.00        14

    accuracy                           0.85       100
   macro avg       0.43      0.49      0.46       100
weighted avg       0.74      0.85      0.79       100


Confusion Matrix:
[[85  1]
 [14  0]]


### **** Since the Decision tree failed to classify the Delinquent account, I'm checking with the Logistic Regression

In [20]:
from sklearn.linear_model import LogisticRegression

In [21]:
features = ['Credit_Utilization', 
            'Missed_Payments', 
            'Income', 
            'Debt_to_Income_Ratio', 
            'Account_Tenure']

X = df[features]
y = df['Delinquent_Account']

In [22]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [23]:
# Scaling the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [24]:
lr = LogisticRegression(max_iter=1000, class_weight='balanced')
lr.fit(X_train_scaled, y_train)

# model = LogisticRegression(max_iter=1000)
# model.fit(X_train, y_train)

In [25]:
y_pred_lr = lr.predict(X_test_scaled)
y_prob_lr = lr.predict_proba(X_test_scaled)[:, 1]

In [27]:
print("Accuracy:", accuracy_score(y_test, y_pred_lr))
print("Precision:", precision_score(y_test, y_pred_lr))
print("Recall:", recall_score(y_test, y_pred_lr))
print("F1 Score:", f1_score(y_test, y_pred_lr))
print("AUC:", roc_auc_score(y_test, y_prob_lr))

print("\nClassification Report:\n", classification_report(y_test, y_pred_lr))

Accuracy: 0.43
Precision: 0.12280701754385964
Recall: 0.5
F1 Score: 0.19718309859154928
AUC: 0.4833887043189369

Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.42      0.56        86
           1       0.12      0.50      0.20        14

    accuracy                           0.43       100
   macro avg       0.48      0.46      0.38       100
weighted avg       0.74      0.43      0.51       100

