In [None]:
# Load Data & Imports
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score

df = pd.read_csv("../data/processed/clean_telco_churn.csv")

X = df.drop(columns=['Churn'])
y = df['Churn']

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

In [None]:
# Define Parameter 
param_grid = {
    "n_estimators": [100, 200],
    "max_depth": [5, 10, None],
    "min_samples_split": [20, 50],
    "min_samples_leaf": [10, 20],
}

In [None]:
# Initialize GridSearch
rf = RandomForestClassifier(
    class_weight="balanced",
    random_state=42,
    n_jobs=-1
)

grid_search = GridSearchCV(
    estimator=rf,
    param_grid=param_grid,
    scoring="recall",
    cv=5,
    n_jobs=-1,
    verbose=1
)

In [None]:
grid_search.fit(X_train, y_train)

In [None]:
# Inspect Best Parameters
grid_search.best_params_

In [None]:
# Evaluate Tuned Model
best_rf = grid_search.best_estimator_

y_pred = best_rf.predict(X_test)
y_prob = best_rf.predict_proba(X_test)[:, 1]

tuned_results = {
    "model_name": "Random Forest (Tuned)",
    "roc_auc": roc_auc_score(y_test, y_prob),
    "precision": precision_score(y_test, y_pred),
    "recall": recall_score(y_test, y_pred),
    "f1_score": f1_score(y_test, y_pred)
}

tuned_results

In [None]:
# Save Tuned Results to JSON
import json

with open("../results/random_forest_tuned.json", "w") as f:
    json.dump(tuned_results, f, indent=4)

In [None]:
# Compare Old vs Tuned
import json

files = [
    "../results/random_forest.json",
    "../results/random_forest_tuned.json"
]

comparison = []
for file in files:
    with open(file, "r") as f:
        comparison.append(json.load(f))

pd.DataFrame(comparison)

## ðŸ“Œ Hyperparameter Tuning Summary

Hyperparameter tuning improved the Random Forest model by optimizing tree depth and minimum sample constraints.

The tuned model achieved higher recall while maintaining stable precision, making it more suitable for the churn prediction business objective.