### HYPER-PARAMETER OPTIMIZATION

#### GRIDSEARCH

In [1]:
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold,GridSearchCV
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier

# set up the dataset
X = load_breast_cancer(as_frame=True)['data']
y = load_breast_cancer()['target']


In [2]:
## stratified kfold
str_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=23)

In [3]:
classfier = RandomForestClassifier(random_state=23)

# param_grid
param_grid = {
    'n_estimators': [50,60,70,80,90],
    'max_depth': [3,4,5,6],
    'criterion': ['gini','entropy']
}

model = GridSearchCV(estimator=classfier,param_grid=param_grid,
                     scoring='f1',cv = str_kfold.split(X,y),
                     verbose=3,return_train_score=True)

model.fit(X=X, y=y)

Fitting 5 folds for each of 40 candidates, totalling 200 fits
[CV 1/5] END criterion=gini, max_depth=3, n_estimators=50;, score=(train=0.983, test=0.978) total time=   0.3s
[CV 2/5] END criterion=gini, max_depth=3, n_estimators=50;, score=(train=0.984, test=0.952) total time=   0.3s
[CV 3/5] END criterion=gini, max_depth=3, n_estimators=50;, score=(train=0.984, test=0.940) total time=   0.1s
[CV 4/5] END criterion=gini, max_depth=3, n_estimators=50;, score=(train=0.984, test=0.973) total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=3, n_estimators=50;, score=(train=0.985, test=0.972) total time=   0.0s
[CV 1/5] END criterion=gini, max_depth=3, n_estimators=60;, score=(train=0.985, test=0.986) total time=   0.0s
[CV 2/5] END criterion=gini, max_depth=3, n_estimators=60;, score=(train=0.984, test=0.952) total time=   0.1s
[CV 3/5] END criterion=gini, max_depth=3, n_estimators=60;, score=(train=0.986, test=0.940) total time=   0.2s
[CV 4/5] END criterion=gini, max_depth=3, n_estima

In [4]:
# return the best score
model.best_score_

0.9711371645321135

In [5]:
model.best_params_

{'criterion': 'entropy', 'max_depth': 6, 'n_estimators': 70}

## Randomized Search

In [8]:
from sklearn.model_selection import RandomizedSearchCV


param_grid = {
    'n_estimators': [50,60,70,80,90],
    'max_depth': [3,4,5,6],
    'criterion': ['gini','entropy']
}

model = RandomizedSearchCV(estimator=classfier, param_distributions=param_grid,
                           n_iter=15, scoring='f1',return_train_score=True,
                           verbose=3, random_state=23, cv=str_kfold.split(X=X,y=y))
model.fit(X=X,y=y)

Fitting 5 folds for each of 15 candidates, totalling 75 fits
[CV 1/5] END criterion=gini, max_depth=6, n_estimators=80;, score=(train=0.995, test=0.986) total time=   0.4s
[CV 2/5] END criterion=gini, max_depth=6, n_estimators=80;, score=(train=0.998, test=0.966) total time=   0.3s
[CV 3/5] END criterion=gini, max_depth=6, n_estimators=80;, score=(train=1.000, test=0.945) total time=   0.2s
[CV 4/5] END criterion=gini, max_depth=6, n_estimators=80;, score=(train=0.997, test=0.979) total time=   0.3s
[CV 5/5] END criterion=gini, max_depth=6, n_estimators=80;, score=(train=0.998, test=0.972) total time=   0.2s
[CV 1/5] END criterion=entropy, max_depth=3, n_estimators=80;, score=(train=0.985, test=0.971) total time=   0.2s
[CV 2/5] END criterion=entropy, max_depth=3, n_estimators=80;, score=(train=0.984, test=0.959) total time=   0.2s
[CV 3/5] END criterion=entropy, max_depth=3, n_estimators=80;, score=(train=0.986, test=0.939) total time=   0.3s
[CV 4/5] END criterion=entropy, max_depth=

In [9]:
model.best_score_

0.9710400218108628

In [10]:
model.best_params_

{'n_estimators': 80, 'max_depth': 5, 'criterion': 'entropy'}

#### Bayesian Optimization with Optuna.

In [22]:
import optuna
from sklearn.model_selection import cross_val_score



# step 1: define objective function

def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 20, 1000)
    max_depth = trial.suggest_int('max_depth', 2, 100)
    
    classifier = RandomForestClassifier(n_estimators=n_estimators,
                                        max_depth=max_depth, criterion='entropy',
                                        random_state=23)
    score = cross_val_score(estimator=classifier, cv = 5, scoring="f1",X=X, y=y).mean()
    
    return score

# step 2: create study
study = optuna.create_study(sampler=optuna.samplers.RandomSampler(seed=23), direction='maximize')
study.optimize(objective, n_trials=50)

[I 2025-06-03 14:41:35,500] A new study created in memory with name: no-name-ad176f60-e9d9-4242-9ece-7abb39ed252b
[I 2025-06-03 14:41:49,386] Trial 0 finished with value: 0.973538014470735 and parameters: {'n_estimators': 527, 'max_depth': 95}. Best is trial 0 with value: 0.973538014470735.
[I 2025-06-03 14:42:03,025] Trial 1 finished with value: 0.973538014470735 and parameters: {'n_estimators': 770, 'max_depth': 29}. Best is trial 0 with value: 0.973538014470735.
[I 2025-06-03 14:42:07,187] Trial 2 finished with value: 0.9720596406819025 and parameters: {'n_estimators': 236, 'max_depth': 69}. Best is trial 0 with value: 0.973538014470735.
[I 2025-06-03 14:42:11,038] Trial 3 finished with value: 0.972063019687948 and parameters: {'n_estimators': 183, 'max_depth': 40}. Best is trial 0 with value: 0.973538014470735.
[I 2025-06-03 14:42:22,111] Trial 4 finished with value: 0.9720596406819025 and parameters: {'n_estimators': 626, 'max_depth': 42}. Best is trial 0 with value: 0.97353801447

## visualizing the experiment.

In [24]:
optuna.visualization.plot_optimization_history(study)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [25]:
optuna.visualization.plot_param_importances(study)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [21]:
study.best_params

{'n_estimators': 888, 'max_depth': 31, 'criterion': 'entropy'}

In [None]:
optuna.visualization.plot_parallel_coordinate(study)