In [1]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from scipy.stats import randint

In [2]:
iris = load_iris()
X = iris.data
y = iris.target

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 1. Manual Tuning

In [5]:
# Try different combinations manually
params = [
    {'n_estimators': 5, 'max_depth': 3},
    {'n_estimators': 12, 'max_depth': 5},
    {'n_estimators': 100, 'max_depth': None}
]

for p in params:
    model = RandomForestClassifier(**p, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"Params: {p}, Accuracy: {acc:.3f}")

Params: {'n_estimators': 5, 'max_depth': 3}, Accuracy: 0.967
Params: {'n_estimators': 12, 'max_depth': 5}, Accuracy: 1.000
Params: {'n_estimators': 100, 'max_depth': None}, Accuracy: 1.000


## 2. Grid Search

In [None]:
%%time

# Define parameter grid
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [3, 5, None],
    'min_samples_split': [2, 4, 6]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)

# Fit the model
grid_search.fit(X_train, y_train)

# Best parameters and score
print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Validation Accuracy:", grid_search.best_score_)

Best Parameters: {'max_depth': 3, 'min_samples_split': 2, 'n_estimators': 50}
Best Cross-Validation Accuracy: 0.95
Test Accuracy: 1.000


## 3. Random Search

In [None]:
%%time

# Define parameter distributions
param_dist = {
    'n_estimators': randint(50, 200),
    'max_depth': [3, 5, 7, None],
    'min_samples_split': randint(2, 10)
}

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_distributions=param_dist,
    n_iter=10,
    cv=5,
    scoring='accuracy',
    random_state=42,
    n_jobs=-1
)

# Fit the model
random_search.fit(X_train, y_train)

# Results
print("Best Parameters (Random Search):", random_search.best_params_)
print("Best Cross-Validation Accuracy:", random_search.best_score_)

Best Parameters (Random Search): {'max_depth': 7, 'min_samples_split': 5, 'n_estimators': 142}
Best Cross-Validation Accuracy: 0.95
Test Accuracy: 1.000
CPU times: total: 500 ms
Wall time: 3.28 s


---
| Method | Description | Pros | Cons |
|--------|--------------|------|------|
| Manual Search | Try a few configurations manually | Simple | Not scalable |
| Grid Search | Tests all combinations in a grid | Exhaustive | Slow for large search space |
| Random Search | Tests random combinations | Fast and efficient | Might miss best combination |

**Final Thoughts:**
- Use **GridSearchCV** for small, well-defined grids.
- Use **RandomizedSearchCV** for large search spaces or limited compute time.
- Combine tuning with **Cross-Validation** for reliable model evaluation.
