In [4]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

from sklearn.model_selection import RandomizedSearchCV

In [12]:
# again I'm using iris. It's my fav
data = load_iris()
X = data.data
y = data.target

In [13]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Grid Search

#### Define models and grid of hyper parameters

In [5]:
model = RandomForestClassifier()

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}


In [6]:
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

In [7]:
# Best parameters and score
print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Validation Score:", grid_search.best_score_)

Best Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 100}
Best Cross-Validation Score: 0.9583333333333334


In [8]:
# Evaluate on the test set
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
print("Test Set Accuracy:", best_model.score(X_test, y_test))

Test Set Accuracy: 1.0


## Random Search

#### Define models and grid of hyper parameters

In [10]:
model = RandomForestClassifier()

param_dist = {
    'n_estimators': [50, 100, 200, 500],
    'max_depth': [None, 10, 20, 30, 40],
    'min_samples_split': [2, 5, 10],
    'bootstrap': [True, False]
}

In [11]:
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_dist, n_iter=10, cv=5, scoring='accuracy', random_state=42)
random_search.fit(X_train, y_train)

In [12]:
print("Best Parameters:", random_search.best_params_)
print("Best Cross-Validation Score:", random_search.best_score_)

Best Parameters: {'n_estimators': 50, 'min_samples_split': 10, 'max_depth': 30, 'bootstrap': True}
Best Cross-Validation Score: 0.95


In [13]:
best_model = random_search.best_estimator_
print("Test Set Accuracy:", best_model.score(X_test, y_test))

Test Set Accuracy: 1.0


## Bayesian Optimization

In [15]:
pip install optuna

Collecting optuna
  Obtaining dependency information for optuna from https://files.pythonhosted.org/packages/15/da/68883911855d8b4d521f9a370e4e6aab8232b91c1d8d5a8348c4680c6642/optuna-3.6.1-py3-none-any.whl.metadata
  Downloading optuna-3.6.1-py3-none-any.whl.metadata (17 kB)
Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
   ---------------------------------------- 0.0/380.1 kB ? eta -:--:--
   ---------------------------------------- 0.0/380.1 kB ? eta -:--:--
   - -------------------------------------- 10.2/380.1 kB ? eta -:--:--
   --- ----------------------------------- 30.7/380.1 kB 435.7 kB/s eta 0:00:01
   ---- ---------------------------------- 41.0/380.1 kB 393.8 kB/s eta 0:00:01
   ------- ------------------------------- 71.7/380.1 kB 491.5 kB/s eta 0:00:01
   ---------- --------------------------- 102.4/380.1 kB 535.8 kB/s eta 0:00:01
   ------------ ------------------------- 122.9/380.1 kB 554.9 kB/s eta 0:00:01
   -------------- ----------------------- 143.4/380.1 kB 53

In [11]:
import optuna
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, train_test_split

In [8]:
def objective(trial, X_train, y_train):
    n_estimators = trial.suggest_int('n_estimators', 50, 500)
    max_depth = trial.suggest_int('max_depth', 10, 50)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
    
    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split)
    return cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy').mean()


In [14]:
# Define the objective function with X_train and y_train as parameters
def objective(trial, X_train, y_train):
    n_estimators = trial.suggest_int('n_estimators', 50, 500)
    max_depth = trial.suggest_int('max_depth', 10, 50)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
    
    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split)
    return cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy').mean()


In [15]:
# Create study and optimize, passing X_train and y_train to the objective function
study = optuna.create_study(direction='maximize')
study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=20)

[I 2024-08-14 16:35:12,154] A new study created in memory with name: no-name-23203c85-d397-4137-8c64-8f6bd972adbf
[I 2024-08-14 16:35:13,855] Trial 0 finished with value: 0.95 and parameters: {'n_estimators': 237, 'max_depth': 49, 'min_samples_split': 9}. Best is trial 0 with value: 0.95.
[I 2024-08-14 16:35:14,437] Trial 1 finished with value: 0.95 and parameters: {'n_estimators': 71, 'max_depth': 10, 'min_samples_split': 4}. Best is trial 0 with value: 0.95.
[I 2024-08-14 16:35:17,252] Trial 2 finished with value: 0.95 and parameters: {'n_estimators': 381, 'max_depth': 34, 'min_samples_split': 5}. Best is trial 0 with value: 0.95.
[I 2024-08-14 16:35:20,629] Trial 3 finished with value: 0.95 and parameters: {'n_estimators': 417, 'max_depth': 20, 'min_samples_split': 4}. Best is trial 0 with value: 0.95.
[I 2024-08-14 16:35:22,267] Trial 4 finished with value: 0.95 and parameters: {'n_estimators': 228, 'max_depth': 34, 'min_samples_split': 5}. Best is trial 0 with value: 0.95.
[I 2024

In [16]:
print("Best Parameters:", study.best_params)

Best Parameters: {'n_estimators': 477, 'max_depth': 35, 'min_samples_split': 4}


In [17]:
# Train final model with best parameters
best_model = RandomForestClassifier(**study.best_params)
best_model.fit(X_train, y_train)
print("Test Set Accuracy:", best_model.score(X_test, y_test))

Test Set Accuracy: 1.0
