In [1]:
#!pip install scikit-learn hyperopt


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from hyperopt.pyll.base import scope

In [3]:
# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
# Grid Search Cross-Validation
rf = RandomForestClassifier(random_state=42)
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)
print(f'Grid Search Best Hyperparameters: {grid_search.best_params_}')
best_rf = grid_search.best_estimator_
y_pred = best_rf.predict(X_test)
print(f'Grid Search Accuracy: {accuracy_score(y_test, y_pred):.2f}')


Fitting 5 folds for each of 108 candidates, totalling 540 fits
Grid Search Best Hyperparameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 200}
Grid Search Accuracy: 1.00


In [5]:
# Random Search Cross-Validation
param_dist = {
    'n_estimators': [50, 100, 200, 300, 400],
    'max_depth': [None, 10, 20, 30, 40],
    'min_samples_split': [2, 5, 10, 15],
    'min_samples_leaf': [1, 2, 4, 6]
}
random_search = RandomizedSearchCV(estimator=rf, param_distributions=param_dist, n_iter=50, cv=5, n_jobs=-1, verbose=2, random_state=42)
random_search.fit(X_train, y_train)
print(f'Random Search Best Hyperparameters: {random_search.best_params_}')
best_rf_random = random_search.best_estimator_
y_pred_random = best_rf_random.predict(X_test)
print(f'Random Search Accuracy: {accuracy_score(y_test, y_pred_random):.2f}')


Fitting 5 folds for each of 50 candidates, totalling 250 fits
Random Search Best Hyperparameters: {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_depth': 30}
Random Search Accuracy: 1.00


In [6]:
# Bayesian Optimization with Hyperopt
space = {
    'n_estimators': scope.int(hp.quniform('n_estimators', 50, 300, 50)),
    'max_depth': scope.int(hp.quniform('max_depth', 5, 50, 5)),
    'min_samples_split': scope.int(hp.quniform('min_samples_split', 2, 20, 1)),
    'min_samples_leaf': scope.int(hp.quniform('min_samples_leaf', 1, 10, 1))
}
def objective(params):
    clf = RandomForestClassifier(**params, random_state=42)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return {'loss': -accuracy, 'status': STATUS_OK}
trials = Trials()
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=50, trials=trials, rstate=np.random.default_rng(42))
print(f'Bayesian Optimization Best Hyperparameters: {best}')
best_rf_bayes = RandomForestClassifier(n_estimators=int(best['n_estimators']),
                                       max_depth=int(best['max_depth']),
                                       min_samples_split=int(best['min_samples_split']),
                                       min_samples_leaf=int(best['min_samples_leaf']),
                                       random_state=42)
best_rf_bayes.fit(X_train, y_train)
y_pred_bayes = best_rf_bayes.predict(X_test)
print(f'Bayesian Optimization Accuracy: {accuracy_score(y_test, y_pred_bayes):.2f}')


100%|███████████████████████| 50/50 [00:07<00:00,  6.51trial/s, best loss: -1.0]
Bayesian Optimization Best Hyperparameters: {'max_depth': 35.0, 'min_samples_leaf': 9.0, 'min_samples_split': 13.0, 'n_estimators': 250.0}
Bayesian Optimization Accuracy: 1.00
