Question 4: Hyperparameter Tuning with RandomizedSearchCV<br>
Task: Use RandomizedSearchCV to optimize hyperparameters for a RandomForestClassifier.

In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.metrics import classification_report

# Step 1: Load dataset
data = load_iris()
X, y = data.data, data.target

# Step 2: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Define parameter grid for Random Search
param_dist = {
    'n_estimators': [10, 50, 100, 150],
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# Step 4: Initialize RandomForestClassifier
rf = RandomForestClassifier(random_state=42)

# Step 5: RandomizedSearchCV setup
random_search = RandomizedSearchCV(
    estimator=rf,
    param_distributions=param_dist,
    n_iter=20,  # Number of random combinations to try
    cv=5,
    verbose=1,
    random_state=42,
    n_jobs=-1,
    scoring='accuracy'
)

# Step 6: Fit the model
random_search.fit(X_train, y_train)

# Step 7: Evaluate the best model
best_model = random_search.best_estimator_
y_pred = best_model.predict(X_test)

print("\nBest Parameters Found:", random_search.best_params_)
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Fitting 5 folds for each of 20 candidates, totalling 100 fits

Best Parameters Found: {'n_estimators': 100, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': 5, 'bootstrap': True}

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

