In [2]:
# Day 89 - Hyperparameter Tuning

import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import optuna
import warnings
warnings.filterwarnings("ignore")

# ---------------- Dataset ----------------
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ---------------- Baseline Model ----------------
base_model = RandomForestClassifier(random_state=42)
base_model.fit(X_train, y_train)
base_pred = base_model.predict(X_test)
print("Baseline Accuracy:", round(accuracy_score(y_test, base_pred), 3))

  from .autonotebook import tqdm as notebook_tqdm


Baseline Accuracy: 1.0


In [3]:
# ---------------- 1️⃣ Grid Search CV ----------------
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 10, None],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=3, n_jobs=-1)
grid_search.fit(X_train, y_train)
grid_best = grid_search.best_estimator_
grid_pred = grid_best.predict(X_test)
print("\nGridSearchCV Best Params:", grid_search.best_params_)
print("GridSearchCV Accuracy:", round(accuracy_score(y_test, grid_pred), 3))


GridSearchCV Best Params: {'max_depth': 5, 'min_samples_split': 2, 'n_estimators': 50}
GridSearchCV Accuracy: 1.0


In [5]:
# ---------------- 2️⃣ Randomized Search CV ----------------
from scipy.stats import randint

param_dist = {
    'n_estimators': randint(50, 300),
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': randint(2, 10)
}

random_search = RandomizedSearchCV(RandomForestClassifier(random_state=42),
                                   param_distributions=param_dist,
                                   n_iter=10,
                                   cv=3,
                                   random_state=42,
                                   n_jobs=-1)
random_search.fit(X_train, y_train)
rand_best = random_search.best_estimator_
rand_pred = rand_best.predict(X_test)
print("\nRandomizedSearchCV Best Params:", random_search.best_params_)
print("RandomizedSearchCV Accuracy:", round(accuracy_score(y_test, rand_pred), 3))


RandomizedSearchCV Best Params: {'max_depth': None, 'min_samples_split': 6, 'n_estimators': 152}
RandomizedSearchCV Accuracy: 1.0


In [6]:
# ---------------- 3️⃣ Optuna Optimization ----------------
def objective(trial):
    n_estimators = trial.suggest_int("n_estimators", 50, 300)
    max_depth = trial.suggest_int("max_depth", 2, 20)
    min_samples_split = trial.suggest_int("min_samples_split", 2, 10)
    
    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        random_state=42
    )
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    return accuracy_score(y_test, preds)

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)

optuna_best_params = study.best_params
print("\nOptuna Best Params:", optuna_best_params)

optuna_best_model = RandomForestClassifier(**optuna_best_params, random_state=42)
optuna_best_model.fit(X_train, y_train)
optuna_pred = optuna_best_model.predict(X_test)
print("Optuna Tuned Accuracy:", round(accuracy_score(y_test, optuna_pred), 3))


[I 2025-10-18 15:42:17,299] A new study created in memory with name: no-name-d42fa985-2f43-452c-8475-037af90d0247
[I 2025-10-18 15:42:17,427] Trial 0 finished with value: 1.0 and parameters: {'n_estimators': 76, 'max_depth': 6, 'min_samples_split': 7}. Best is trial 0 with value: 1.0.
[I 2025-10-18 15:42:17,813] Trial 1 finished with value: 1.0 and parameters: {'n_estimators': 257, 'max_depth': 15, 'min_samples_split': 6}. Best is trial 0 with value: 1.0.
[I 2025-10-18 15:42:17,986] Trial 2 finished with value: 1.0 and parameters: {'n_estimators': 102, 'max_depth': 8, 'min_samples_split': 4}. Best is trial 0 with value: 1.0.
[I 2025-10-18 15:42:18,094] Trial 3 finished with value: 1.0 and parameters: {'n_estimators': 62, 'max_depth': 20, 'min_samples_split': 8}. Best is trial 0 with value: 1.0.
[I 2025-10-18 15:42:18,294] Trial 4 finished with value: 1.0 and parameters: {'n_estimators': 94, 'max_depth': 9, 'min_samples_split': 8}. Best is trial 0 with value: 1.0.
[I 2025-10-18 15:42:18


Optuna Best Params: {'n_estimators': 76, 'max_depth': 6, 'min_samples_split': 7}
Optuna Tuned Accuracy: 1.0
