In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier

# Load the Iris dataset
data = load_iris()
X, y = data.data, data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the Random Forest model
rf_model = RandomForestClassifier(random_state=42)

# Define the hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 10],
    'min_samples_split': [2, 5, 10]
}

# Perform grid search
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

# Best parameters
print("Best Parameters:", grid_search.best_params_)

# Evaluate the model on the test set
best_model = grid_search.best_estimator_
accuracy = best_model.score(X_test, y_test)
print(f"Test Set Accuracy: {accuracy:.2f}")


Fitting 3 folds for each of 27 candidates, totalling 81 fits
Best Parameters: {'max_depth': 5, 'min_samples_split': 2, 'n_estimators': 50}
Test Set Accuracy: 1.00


In [4]:
# Print detailed results
import pandas as pd
cv_results = pd.DataFrame(grid_search.cv_results_)
cv_results[['params', 'mean_test_score', 'rank_test_score']]


Unnamed: 0,params,mean_test_score,rank_test_score
0,"{'max_depth': 3, 'min_samples_split': 2, 'n_es...",0.95,3
1,"{'max_depth': 3, 'min_samples_split': 2, 'n_es...",0.95,3
2,"{'max_depth': 3, 'min_samples_split': 2, 'n_es...",0.95,3
3,"{'max_depth': 3, 'min_samples_split': 5, 'n_es...",0.95,3
4,"{'max_depth': 3, 'min_samples_split': 5, 'n_es...",0.95,3
5,"{'max_depth': 3, 'min_samples_split': 5, 'n_es...",0.95,3
6,"{'max_depth': 3, 'min_samples_split': 10, 'n_e...",0.95,3
7,"{'max_depth': 3, 'min_samples_split': 10, 'n_e...",0.95,3
8,"{'max_depth': 3, 'min_samples_split': 10, 'n_e...",0.95,3
9,"{'max_depth': 5, 'min_samples_split': 2, 'n_es...",0.958333,1


[CV] END .max_depth=3, min_samples_split=2, n_estimators=100; total time=   0.1s
[CV] END ..max_depth=3, min_samples_split=5, n_estimators=50; total time=   0.0s
[CV] END .max_depth=3, min_samples_split=5, n_estimators=100; total time=   0.1s
[CV] END .max_depth=3, min_samples_split=10, n_estimators=50; total time=   0.0s
[CV] END max_depth=3, min_samples_split=10, n_estimators=200; total time=   0.1s
[CV] END ..max_depth=5, min_samples_split=5, n_estimators=50; total time=   0.0s
[CV] END .max_depth=5, min_samples_split=5, n_estimators=100; total time=   0.1s
[CV] END .max_depth=5, min_samples_split=10, n_estimators=50; total time=   0.0s
[CV] END max_depth=5, min_samples_split=10, n_estimators=200; total time=   0.1s
[CV] END max_depth=5, min_samples_split=10, n_estimators=200; total time=   0.1s
[CV] END .max_depth=3, min_samples_split=2, n_estimators=200; total time=   0.1s
[CV] END max_depth=3, min_samples_split=10, n_estimators=200; total time=   0.1s
[CV] END .max_depth=5, min_s

In [5]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
import numpy as np

# Load the Iris dataset
data = load_iris()
X, y = data.data, data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the model
rf_model = RandomForestClassifier(random_state=42)

# Define the hyperparameter distribution
param_dist = {
    'n_estimators': np.arange(50, 200, 50),
    'max_depth': [3, 5, 10, 20],
    'min_samples_split': [2, 5, 10]
}

# Perform randomized search
random_search = RandomizedSearchCV(estimator=rf_model, param_distributions=param_dist, n_iter=10, cv=3, n_jobs=-1)
random_search.fit(X_train, y_train)

# Best parameters
print("Best Parameters:", random_search.best_params_)

Best Parameters: {'n_estimators': 150, 'min_samples_split': 2, 'max_depth': 5}


In [6]:
# Print detailed results
import pandas as pd
cv_results = pd.DataFrame(random_search.cv_results_)
cv_results[['params', 'mean_test_score', 'rank_test_score']]


Unnamed: 0,params,mean_test_score,rank_test_score
0,"{'n_estimators': 100, 'min_samples_split': 10,...",0.95,2
1,"{'n_estimators': 150, 'min_samples_split': 10,...",0.95,2
2,"{'n_estimators': 50, 'min_samples_split': 2, '...",0.95,2
3,"{'n_estimators': 100, 'min_samples_split': 5, ...",0.95,2
4,"{'n_estimators': 50, 'min_samples_split': 5, '...",0.95,2
5,"{'n_estimators': 100, 'min_samples_split': 2, ...",0.95,2
6,"{'n_estimators': 100, 'min_samples_split': 5, ...",0.95,2
7,"{'n_estimators': 150, 'min_samples_split': 10,...",0.95,2
8,"{'n_estimators': 50, 'min_samples_split': 5, '...",0.95,2
9,"{'n_estimators': 150, 'min_samples_split': 2, ...",0.958333,1


In [7]:
import optuna
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

# Load the Iris dataset
data = load_iris()
X, y = data.data, data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Objective function to minimize
def objective(trial):
    
    # Hyperparameters to tune
    n_estimators = trial.suggest_int('n_estimators', 50, 200)
    max_depth = trial.suggest_int('max_depth', 3, 10)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
    
    # Define and train the model
    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split, random_state=42)
    score = cross_val_score(model, X_train, y_train, n_jobs=-1, cv=3)
    return score.mean()

# Create a study and optimize
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# Best hyperparameters
print("Best Parameters:", study.best_params)

[I 2024-12-07 20:21:08,041] A new study created in memory with name: no-name-df151c49-206c-48cd-98cf-7fd052058060
[I 2024-12-07 20:21:09,518] Trial 0 finished with value: 0.9500000000000001 and parameters: {'n_estimators': 92, 'max_depth': 10, 'min_samples_split': 10}. Best is trial 0 with value: 0.9500000000000001.
[I 2024-12-07 20:21:10,036] Trial 1 finished with value: 0.9500000000000001 and parameters: {'n_estimators': 104, 'max_depth': 4, 'min_samples_split': 6}. Best is trial 0 with value: 0.9500000000000001.
[I 2024-12-07 20:21:10,546] Trial 2 finished with value: 0.9500000000000001 and parameters: {'n_estimators': 163, 'max_depth': 4, 'min_samples_split': 7}. Best is trial 0 with value: 0.9500000000000001.
[I 2024-12-07 20:21:11,042] Trial 3 finished with value: 0.9500000000000001 and parameters: {'n_estimators': 131, 'max_depth': 9, 'min_samples_split': 4}. Best is trial 0 with value: 0.9500000000000001.
[I 2024-12-07 20:21:11,130] Trial 4 finished with value: 0.95000000000000

Best Parameters: {'n_estimators': 79, 'max_depth': 8, 'min_samples_split': 2}
