### Step 1: Importing Required Libraries

In [1]:
# Import essential libraries for data processing and machine learning
import pandas as pd  # For handling tabular data
import numpy as np  # For numerical operations
from sklearn.datasets import load_iris  # To load the Iris dataset
from sklearn.ensemble import RandomForestClassifier  # Random Forest model
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV  # Tuning methods
from sklearn.metrics import accuracy_score, classification_report  # For evaluation

### Step 2: Load and Prepare the Dataset

In [2]:
# Load the Iris dataset into memory
iris = load_iris()

# Separate features (X) and target labels (y)
X = iris.data  # Feature matrix with 4 columns (sepal and petal measurements)
y = iris.target  # Target variable with 3 classes (species of iris flowers)

# Split the dataset into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42  # Ensure reproducibility with random_state
)

### Step 3: Define the Model

In [3]:
# Initialize a Random Forest classifier model
# Random Forest is a robust and flexible classification algorithm
model = RandomForestClassifier(random_state=42)  # Set random_state for consistency

### Step 4: Hyperparameter Tuning Using GridSearchCV

**Define the Parameter Grid**

In [4]:
# Specify a grid of hyperparameters to search over
param_grid = {
    'n_estimators': [10, 50, 100],  # Number of trees in the forest
    'max_depth': [None, 10, 20, 30],  # Maximum depth of each tree
    'min_samples_split': [2, 5, 10],  # Minimum samples required to split a node
    'min_samples_leaf': [1, 2, 4]  # Minimum samples required to form a leaf node
}

**Perform Grid Search**

In [5]:
# Initialize GridSearchCV with the model and parameter grid
grid_search = GridSearchCV(
    estimator=model,  # Model to optimize
    param_grid=param_grid,  # Hyperparameter grid to search
    cv=3,  # Perform 3-fold cross-validation
    scoring='accuracy',  # Metric to optimize
    verbose=2,  # Display detailed progress
    n_jobs=-1  # Use all available CPU cores
)

# Fit GridSearchCV on the training data
grid_search.fit(X_train, y_train)

# Display the best hyperparameters and corresponding score
print("Best Parameters from GridSearchCV:", grid_search.best_params_)
print("Best Score from GridSearchCV:", grid_search.best_score_)

Fitting 3 folds for each of 108 candidates, totalling 324 fits
Best Parameters from GridSearchCV: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 10}
Best Score from GridSearchCV: 0.9583333333333334


### Step 5: Hyperparameter Tuning Using RandomizedSearchCV

**Define the Parameter Distributions**

In [6]:
# Use scipy's randint to define a distribution for random sampling
from scipy.stats import randint

param_dist = {
    'n_estimators': randint(10, 200),  # Sample number of trees randomly between 10 and 200
    'max_depth': [None, 10, 20, 30],  # Use the same depth options as GridSearchCV
    'min_samples_split': randint(2, 10),  # Randomly sample split thresholds between 2 and 10
    'min_samples_leaf': randint(1, 10)  # Randomly sample leaf sizes between 1 and 10
}

**Perform Randomized Search**

In [7]:
# Initialize RandomizedSearchCV with the model and parameter distribution
random_search = RandomizedSearchCV(
    estimator=model,  # Model to optimize
    param_distributions=param_dist,  # Hyperparameter distributions for sampling
    n_iter=50,  # Number of random combinations to try
    cv=3,  # Perform 3-fold cross-validation
    scoring='accuracy',  # Metric to optimize
    verbose=2,  # Display detailed progress
    random_state=42,  # Ensure reproducibility
    n_jobs=-1  # Use all available CPU cores
)

# Fit RandomizedSearchCV on the training data
random_search.fit(X_train, y_train)

# Display the best hyperparameters and corresponding score
print("Best Parameters from RandomizedSearchCV:", random_search.best_params_)
print("Best Score from RandomizedSearchCV:", random_search.best_score_)

Fitting 3 folds for each of 50 candidates, totalling 150 fits
Best Parameters from RandomizedSearchCV: {'max_depth': 10, 'min_samples_leaf': 3, 'min_samples_split': 2, 'n_estimators': 14}
Best Score from RandomizedSearchCV: 0.9666666666666667


### Step 6: Evaluate the Best Models

**GridSearchCV Evaluation**

In [8]:
# Retrieve the best model from GridSearchCV
best_model_grid = grid_search.best_estimator_

# Make predictions on the test set using the best model
y_pred_grid = best_model_grid.predict(X_test)

# Display classification report for GridSearchCV results
print("\nGridSearchCV Classification Report:")
print(classification_report(y_test, y_pred_grid))


GridSearchCV Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



**RandomizedSearchCV Evaluation**

In [9]:
# Retrieve the best model from RandomizedSearchCV
best_model_random = random_search.best_estimator_

# Make predictions on the test set using the best model
y_pred_random = best_model_random.predict(X_test)

# Display classification report for RandomizedSearchCV results
print("\nRandomizedSearchCV Classification Report:")
print(classification_report(y_test, y_pred_random))


RandomizedSearchCV Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

