### TUNING WITH OPTUNA

In [2]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt


from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
from sklearn.model_selection import train_test_split, cross_val_score


import optuna

In [4]:
X = datasets.load_diabetes(as_frame= True)['data']
y = datasets.load_diabetes()['target']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                    random_state= 23)


def objective(trials):
    
    n_estimators = trials.suggest_int('n_estimators', 5, 1000)
    max_depth = trials.suggest_int('max_depth', 1, 100)
    min_samples_leaf = trials.suggest_int('min_samples_leaf', 1, 30)
    min_samples_split = trials.suggest_int('min_samples_split', 2, 30)
    
    model = RandomForestRegressor(n_estimators= n_estimators,max_depth=max_depth,
                                  min_samples_leaf=min_samples_leaf, 
                                  min_samples_split=min_samples_split)
    score = -cross_val_score(estimator=model, X=X_train,
                            y=y_train, cv = 5, scoring= 'neg_root_mean_squared_error').mean()
    
    return score

study = optuna.create_study(direction= 'minimize', 
                            sampler= optuna.samplers.RandomSampler(seed=23))
study.optimize(objective, n_trials= 20)

[I 2024-02-23 11:57:58,149] A new study created in memory with name: no-name-900b4d7a-b9ff-44fb-8382-405ba8f0fb44


[I 2024-02-23 11:58:12,941] Trial 0 finished with value: 57.13817060849559 and parameters: {'n_estimators': 520, 'max_depth': 95, 'min_samples_leaf': 23, 'min_samples_split': 10}. Best is trial 0 with value: 57.13817060849559.
[I 2024-02-23 11:58:20,904] Trial 1 finished with value: 56.099419192521694 and parameters: {'n_estimators': 225, 'max_depth': 69, 'min_samples_leaf': 6, 'min_samples_split': 13}. Best is trial 1 with value: 56.099419192521694.
[I 2024-02-23 11:58:43,326] Trial 2 finished with value: 56.20191806882586 and parameters: {'n_estimators': 620, 'max_depth': 42, 'min_samples_leaf': 1, 'min_samples_split': 27}. Best is trial 1 with value: 56.099419192521694.
[I 2024-02-23 11:59:13,223] Trial 3 finished with value: 56.63243037343328 and parameters: {'n_estimators': 886, 'max_depth': 31, 'min_samples_leaf': 18, 'min_samples_split': 30}. Best is trial 1 with value: 56.099419192521694.
[I 2024-02-23 11:59:46,774] Trial 4 finished with value: 55.748657670083404 and parameters

In [5]:
study.best_params

{'n_estimators': 846,
 'max_depth': 7,
 'min_samples_leaf': 9,
 'min_samples_split': 10}

### visualizing the output of the Tuning

In [6]:
optuna.visualization.plot_optimization_history(study)

In [8]:
optuna.visualization.plot_param_importances(study)

In [9]:
optuna.visualization.plot_slice(study)

In [10]:
optuna.visualization.plot_parallel_coordinate(study, params=['n_estimators',
                                                             'max_depth','min_samples_leaf',
                                                             'min_samples_split'])

In [14]:
fig = optuna.visualization.plot_rank(study, params=['n_estimators','max_depth','min_samples_leaf',
                                                             'min_samples_split'])
fig.update_layout(
    autosize=False,
    width=1000,
    height=1000)

fig.show()


plot_rank is experimental (supported from v3.2.0). The interface can change in the future.

