---
# 05 - Hyperparameter Tuning

In [6]:
# Import necessary libraries
import joblib
import numpy as np

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [7]:
# Load the no-leakage dataset and columns
X_nl, y = joblib.load("../data/interim/Xy_nl.pkl")
X_train_nl, X_test_nl, y_train_nl, y_test_nl = joblib.load("../data/interim/nl_split.pkl")

# Load numeric columns without 'scored_by'
numeric_cols_nl = joblib.load("../data/interim/numeric_cols_nl.pkl")

# Load no-leakage pipeline
pipeline_rf_nl = joblib.load("../models/pipeline_rf_nl.pkl")

In [8]:
# Parameter Grid
param_grid = {
    "regressor__n_estimators": [50, 100, 200],
    "regressor__max_depth": [10, 20, None],
    "regressor__min_samples_split": [2, 5],
}

grid_search = GridSearchCV(
    pipeline_rf_nl,
    param_grid,
    cv=3,
    scoring="r2",
    n_jobs=-1,
    verbose=1
)

In [9]:
# Fit the model
grid_search.fit(X_train_nl, y_train_nl)
best_model = grid_search.best_estimator_
print("Best parameters:", grid_search.best_params_)

Fitting 3 folds for each of 18 candidates, totalling 54 fits
Best parameters: {'regressor__max_depth': 10, 'regressor__min_samples_split': 5, 'regressor__n_estimators': 50}


In [10]:
def evaluate_model(name, y_true, y_pred):
    print(f"{name} Evaluation")
    print(f"MAE  : {mean_absolute_error(y_true, y_pred):.4f}")
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    print(f"RMSE : {rmse:.4f}")
    print(f"R²   : {r2_score(y_true, y_pred):.4f}")
    print("-" * 30)

# Evaluate best model
y_pred_best = best_model.predict(X_test_nl)
evaluate_model("Tuned RF (No scored_by)", y_test_nl, y_pred_best)

Tuned RF (No scored_by) Evaluation
MAE  : 0.1120
RMSE : 0.5235
R²   : 0.9789
------------------------------


In [11]:
# Save best model (no leakage, already tuned)
joblib.dump(best_model, "../models/best_model_rf_nl.pkl")

# Save best parameters (optional, helpful for documentation)
joblib.dump(grid_search.best_params_, "../models/best_model_rf_nl_params.pkl")

['../models/best_model_rf_nl_params.pkl']