In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score


In [20]:
df = pd.read_csv('/content/maindataset.csv')


In [21]:
df = df.drop('ID', axis=1, errors='ignore')

In [23]:
X = df.drop('dlq_2yrs', axis=1)
y = df['dlq_2yrs']


In [24]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [25]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [26]:
logreg = LogisticRegression(max_iter=1000, class_weight='balanced')
logreg.fit(X_train_scaled, y_train)
y_pred_logreg = logreg.predict(X_test_scaled)
print("Logistic Regression")
print("Accuracy:", accuracy_score(y_test, y_pred_logreg))
print(classification_report(y_test, y_pred_logreg))
print("ROC-AUC:", roc_auc_score(y_test, logreg.predict_proba(X_test_scaled)[:, 1]))

Logistic Regression
Accuracy: 0.7262937481304218
              precision    recall  f1-score   support

           0       0.69      0.82      0.75      1671
           1       0.78      0.63      0.70      1672

    accuracy                           0.73      3343
   macro avg       0.73      0.73      0.72      3343
weighted avg       0.73      0.73      0.72      3343

ROC-AUC: 0.7932110961261485


In [27]:
dt = DecisionTreeClassifier(random_state=42, class_weight='balanced')
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)
print("Decision Tree")
print("Accuracy:", accuracy_score(y_test, y_pred_dt))
print(classification_report(y_test, y_pred_dt))
print("ROC-AUC:", roc_auc_score(y_test, dt.predict_proba(X_test)[:, 1]))


Decision Tree
Accuracy: 0.6871073885731379
              precision    recall  f1-score   support

           0       0.68      0.69      0.69      1671
           1       0.69      0.68      0.69      1672

    accuracy                           0.69      3343
   macro avg       0.69      0.69      0.69      3343
weighted avg       0.69      0.69      0.69      3343

ROC-AUC: 0.6871093291413617


In [28]:
rf = RandomForestClassifier(random_state=42, class_weight='balanced')
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print("Random Forest")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))
print("ROC-AUC:", roc_auc_score(y_test, rf.predict_proba(X_test)[:, 1]))


Random Forest
Accuracy: 0.7684714328447503
              precision    recall  f1-score   support

           0       0.76      0.78      0.77      1671
           1       0.78      0.76      0.77      1672

    accuracy                           0.77      3343
   macro avg       0.77      0.77      0.77      3343
weighted avg       0.77      0.77      0.77      3343

ROC-AUC: 0.845386683617809
