In [None]:
import os
import shutil

os.makedirs("/root/.kaggle", exist_ok=True)
shutil.move("kaggle.json", "/root/.kaggle/kaggle.json")
os.chmod("/root/.kaggle/kaggle.json", 600)

In [None]:
!kaggle datasets download -d adarshsng/lending-club-loan-data-csv

Dataset URL: https://www.kaggle.com/datasets/adarshsng/lending-club-loan-data-csv
License(s): DbCL-1.0
Downloading lending-club-loan-data-csv.zip to /content
 82% 277M/339M [00:00<00:00, 836MB/s] 
100% 339M/339M [00:00<00:00, 769MB/s]


In [None]:
!unzip lending-club-loan-data-csv.zip

Archive:  lending-club-loan-data-csv.zip
  inflating: LCDataDictionary.xlsx   
  inflating: loan.csv                


In [None]:
!pip install xgboost



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor

In [None]:
df = pd.read_csv("loan.csv", low_memory=False)

df.head()

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,...,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term
0,,,2500,2500,2500.0,36 months,13.56,84.92,C,C1,...,,,Cash,N,,,,,,
1,,,30000,30000,30000.0,60 months,18.94,777.23,D,D2,...,,,Cash,N,,,,,,
2,,,5000,5000,5000.0,36 months,17.97,180.69,D,D1,...,,,Cash,N,,,,,,
3,,,4000,4000,4000.0,36 months,18.94,146.51,D,D2,...,,,Cash,N,,,,,,
4,,,30000,30000,30000.0,60 months,16.14,731.78,C,C4,...,,,Cash,N,,,,,,


In [None]:
columns_needed = [
    "loan_amnt",
    "term",
    "int_rate",
    "installment",
    "annual_inc",
    "dti",
    "emp_length",
    "home_ownership"
]

df = df[columns_needed]
df.dropna(inplace=True)

In [None]:
df["term"] = df["term"].str.extract('(\d+)').astype(int)

  df["term"] = df["term"].str.extract('(\d+)').astype(int)


In [None]:
df["emp_length"] = df["emp_length"].str.extract('(\d+)').fillna(0).astype(int)

  df["emp_length"] = df["emp_length"].str.extract('(\d+)').fillna(0).astype(int)


In [None]:
df = pd.get_dummies(df, columns=["home_ownership"], drop_first=True)

In [None]:
df["monthly_income"] = df["annual_inc"] / 12

In [None]:
df["emi_ratio"] = df["installment"] / df["monthly_income"]

In [None]:
df["stress_score"] = (
    df["emi_ratio"] * 50
    + df["dti"] * 0.5
    + df["int_rate"] * 0.5
)

In [None]:
df["stress_score"] = np.clip(df["stress_score"], 0, 100)

In [None]:
X = df.drop("stress_score", axis=1)
y = df["stress_score"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
model = XGBRegressor(
    n_estimators=400,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.9,
    colsample_bytree=0.9,
    random_state=42
)

model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

print("MSE:", mean_squared_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))

MSE: 0.4541238574685528
R2 Score: 0.9907377454644142


In [None]:
X_test_copy = X_test.copy()
X_test_copy["predicted_stress"] = y_pred

In [None]:
def classify_stress(score):
    if score < 30:
        return "Safe"
    elif score < 60:
        return "Moderate"
    elif score < 80:
        return "Risky"
    else:
        return "Dangerous"

X_test_copy["stress_category"] = X_test_copy["predicted_stress"].apply(classify_stress)

X_test_copy.head()

Unnamed: 0,loan_amnt,term,int_rate,installment,annual_inc,dti,emp_length,home_ownership_MORTGAGE,home_ownership_NONE,home_ownership_OTHER,home_ownership_OWN,home_ownership_RENT,monthly_income,emi_ratio,predicted_stress,stress_category
560600,12000,36,8.99,381.55,102000.0,8.12,10,False,False,False,True,False,8500.0,0.044888,10.747007,Safe
727405,15000,36,14.46,516.03,67000.0,10.3,9,False,False,False,True,False,5583.333333,0.092423,17.107695,Safe
2181542,6000,36,10.91,196.18,91733.0,24.42,10,True,False,False,False,False,7644.416667,0.025663,18.976519,Safe
1903056,2000,36,14.99,69.33,39000.0,4.95,4,False,False,False,False,True,3250.0,0.021332,11.033823,Safe
529252,13000,36,14.49,447.41,55000.0,17.43,2,False,False,False,False,True,4583.333333,0.097617,20.784531,Safe


In [None]:
!pip install joblib



In [None]:
import joblib

joblib.dump(model, "emi_stress_model.pkl")

['emi_stress_model.pkl']

In [None]:
joblib.dump(X.columns.tolist(), "emi_features.pkl")

['emi_features.pkl']