In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV

In [2]:
df = pd.read_csv('model_data.csv')

In [3]:
df['Financial_Stress'] = df['Financial_Stress'].map({
    "Low Risk": 0,
    "Medium Risk":1,
    "Risky":2
})

In [4]:
X = df.drop(columns=['Financial_Stress'])
Y = df['Financial_Stress']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.17, random_state=42) 

In [6]:
X_train

Unnamed: 0,monthly_income,num_active_bnpl_plans,total_bnpl_amount,average_emi,bnpl_income_ratio,Emi_delayed
3221,132776,1,14580,14580.0,0.109809,0
1618,78044,1,42384,42384.0,0.543078,1
1178,157355,3,38859,12953.0,0.082317,0
3381,110918,2,11180,5590.0,0.050398,0
2462,163279,2,15240,7620.0,0.046669,0
...,...,...,...,...,...,...
1130,128204,1,23876,23876.0,0.186234,0
1294,177462,1,18785,18785.0,0.105854,0
860,43899,2,11004,5502.0,0.125333,0
3507,92870,1,17225,17225.0,0.185474,0


In [8]:
y_train 

3221    0
1618    1
1178    0
3381    0
2462    0
       ..
1130    0
1294    0
860     0
3507    0
3174    0
Name: Financial_Stress, Length: 2913, dtype: int64

In [17]:
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2']
}
rf = RandomForestClassifier()
grid_search = GridSearchCV(
    estimator=rf,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1,
    verbose=2
)

grid_search.fit(X_train, y_train)

best_rf = grid_search.best_estimator_


print("Best Parameters:")
print(grid_search.best_params_)
y_pred = best_rf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Fitting 5 folds for each of 216 candidates, totalling 1080 fits
Best Parameters:
{'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
Accuracy: 1.0

Confusion Matrix:
 [[452   0   0]
 [  0  72   0]
 [  0   0  73]]


In [22]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
y_prd = knn.predict(X_test)
acc = accuracy_score(y_test, y_prd)
acc

0.9698492462311558

In [24]:
import joblib
joblib.dump(knn, 'model.pkl')

['model.pkl']