In [1]:
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
import joblib


In [2]:
X_train = pd.read_csv(r"C:\Users\theow\Documents\Project\Explainable-Loan-Default\data\processed\X_train.csv")
y_train = pd.read_csv(r"C:\Users\theow\Documents\Project\Explainable-Loan-Default\data\processed\y_train.csv").values.ravel()

Random Forest Hyperparameter Tuning

In [5]:
rf = RandomForestClassifier(random_state=42)

rf_param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'bootstrap': [True, False]
}

rf_grid = GridSearchCV(
    estimator=rf,
    param_grid=rf_param_grid,
    cv=5,
    scoring='roc_auc',
    verbose=1,
    n_jobs=-1
)

rf_grid.fit(X_train, y_train)

print("✅ Best RF Params:", rf_grid.best_params_)
print("✅ Best RF AUC:", rf_grid.best_score_)
joblib.dump(rf_grid.best_estimator_, r"C:\Users\theow\Documents\Project\Explainable-Loan-Default\models\best_rf_model.pkl")


Fitting 5 folds for each of 48 candidates, totalling 240 fits
✅ Best RF Params: {'bootstrap': False, 'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}
✅ Best RF AUC: 0.9351493646015416


['C:\\Users\\theow\\Documents\\Project\\Explainable-Loan-Default\\models\\best_rf_model.pkl']

XGBoost Hyperparameter Tuning

In [6]:
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')

xgb_param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.7, 1],
    'colsample_bytree': [0.7, 1]
}

xgb_grid = GridSearchCV(
    estimator=xgb,
    param_grid=xgb_param_grid,
    cv=5,
    scoring='roc_auc',
    verbose=1,
    n_jobs=-1
)

xgb_grid.fit(X_train, y_train)

print("✅ Best XGB Params:", xgb_grid.best_params_)
print("✅ Best XGB AUC:", xgb_grid.best_score_)
joblib.dump(xgb_grid.best_estimator_, r"C:\Users\theow\Documents\Project\Explainable-Loan-Default\models\best_xgb_model.pkl")


Fitting 5 folds for each of 72 candidates, totalling 360 fits
✅ Best XGB Params: {'colsample_bytree': 1, 'learning_rate': 0.2, 'max_depth': 5, 'n_estimators': 200, 'subsample': 1}
✅ Best XGB AUC: 0.9481140834519085


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


['C:\\Users\\theow\\Documents\\Project\\Explainable-Loan-Default\\models\\best_xgb_model.pkl']

In [9]:
pd.DataFrame(rf_grid.cv_results_).to_csv(r"C:\Users\theow\Documents\Project\Explainable-Loan-Default\results\rf_grid_results.csv", index=False)
pd.DataFrame(xgb_grid.cv_results_).to_csv(r"C:\Users\theow\Documents\Project\Explainable-Loan-Default\results\xgb_grid_results.csv", index=False)