<a href="https://colab.research.google.com/github/Shadabur-Rahaman/30-days-ml-projects/blob/main/Day7_GridSearchCV_Hyperparameter_Tuning_Cleaned/notebooks/Day7_GridSearchCV_Hyperparameter_Tuning_Cleaned.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🔧 Day 7: Hyperparameter Tuning with GridSearchCV

Today we explore **Hyperparameter Tuning** using `GridSearchCV` on the Iris dataset with a **Random Forest Classifier**.

We'll:
- Use GridSearchCV to tune hyperparameters
- Evaluate the best model
- Understand the impact of tuning on performance


In [None]:
# 📦 Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
# 📥 Load Dataset
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target)
X.head()

In [None]:
# 🔍 Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# 🧪 Define Model and Hyperparameter Grid
rf = RandomForestClassifier(random_state=42)
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 3, 5, 10],
    'min_samples_split': [2, 4, 6]
}

In [None]:
# 🔍 GridSearchCV
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid,
                           cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search.fit(X_train, y_train)
print("Best Parameters:", grid_search.best_params_)
results = pd.DataFrame(grid_search.cv_results_)
heatmap_data = results.pivot_table(
    index='param_max_depth',
    columns='param_n_estimators',
    values='mean_test_score'
)

# Plotting heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(heatmap_data, annot=True, fmt=".3f", cmap="YlGnBu")
plt.title("GridSearchCV Accuracy Heatmap")
plt.xlabel("n_estimators")
plt.ylabel("max_depth")
plt.tight_layout()

# Save heatmap
plt.savefig("gridsearch_heatmap.png")
plt.show()

In [None]:
# Step 1: Get the results from grid search
results_df = pd.DataFrame(grid_search.cv_results_)

# Step 2: Create a readable string label for each parameter set
results_df['param_combo'] = results_df['params'].apply(lambda x: str(x))

# Step 3: Sort results by mean_test_score
results_df = results_df.sort_values(by='mean_test_score', ascending=False)

# Step 4: Plot the top N (e.g., 10) parameter combinations
top_n = 10
plt.figure(figsize=(12, 6))
sns.barplot(data=results_df.head(top_n), x='mean_test_score', y='param_combo', palette='viridis')

plt.xlabel("Mean Accuracy (CV)")
plt.ylabel("Parameter Set")
plt.title("Top GridSearchCV Parameter Sets by Accuracy")
plt.tight_layout()

# Save plot
plt.savefig("accuracy_barplot.png")
plt.show()

In [None]:
# ✅ Evaluate Tuned Model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))