# Hyperparameter optimization with optuna on Wine dataset (classification)

Data source: https://scikit-learn.org/stable/datasets/toy_dataset.html

In [None]:
! pip install --quiet optuna

[K     |████████████████████████████████| 76.1 MB 1.1 MB/s 
[?25h

In [None]:
import pandas as pd
import numpy as np

import optuna
from optuna.visualization import plot_param_importances

import sklearn.datasets
import sklearn.ensemble
import sklearn.model_selection
import sklearn.svm

optuna.logging.set_verbosity(optuna.logging.WARNING)

## Optuna

- An open source hyperparameter optimization framework to automate hyperparameter search



In [None]:
# customize the log output to only show when best values are updated
def logging_callback(study, frozen_trial):
    previous_best_value = study.user_attrs.get("previous_best_value", None)
    if previous_best_value != study.best_value:
        study.set_user_attr("previous_best_value", study.best_value)
        print(
            "Trial {} finished with best value: {} and parameters: {}. ".format(
            frozen_trial.number,
            frozen_trial.value,
            frozen_trial.params,
            )
        )

In [None]:
# print study info
def study_info(study):
  num_trial = len(study.trials)
  trial = study.best_trial
  print(f"Number of trials: {num_trial}")

  print(f"Best trial: ")
  print(f"  No.: {trial.number}")
  print(f"  Value: {trial.value}")
  print(f"  Params: ")

  for key, value in trial.params.items():
      print(f"    {key}: {value}")

In [None]:
def objective(trial):
  x = trial.suggest_float("x", -10, 10)
  return (x - 1) ** 2 # objective function

In [None]:
study = optuna.create_study()
study.optimize(objective, n_trials = 100, callbacks=[logging_callback])

Trial 0 finished with best value: 0.28501019083587587 and parameters: {'x': 1.5338634571085343}. 
Trial 1 finished with best value: 0.03894285911347894 and parameters: {'x': 0.802660548512268}. 
Trial 33 finished with best value: 0.0010347011566727482 and parameters: {'x': 0.9678332289983476}. 
Trial 49 finished with best value: 0.0004660814488122324 and parameters: {'x': 0.978411080415819}. 
Trial 51 finished with best value: 3.213952149612913e-11 and parameters: {'x': 1.0000056691729111}. 


In [None]:
study.best_params

{'x': 1.0000056691729111}

In [None]:
study.best_value

3.213952149612913e-11

In [None]:
study_info(study)

Number of finished trials: 100
Best trial : 
  Number: 51
  Value: 3.213952149612913e-11
  Params: 
    x: 1.0000056691729111


In [None]:
def objective(trial):
  x = trial.suggest_float("x", -10, 10)
  y = trial.suggest_float("y", -10, 10)
  z = trial.suggest_float("z", -10, 10)
  return (x - 1)**2 + (y - 2)**2 + (z - 3)**2

study = optuna.create_study()
study.optimize(objective, n_trials = 100, callbacks=[logging_callback])

Trial 0 finished with best value: 185.36401921699712 and parameters: {'x': -8.29537193039899, 'y': -7.9444851162075185, 'z': 3.2594140822600064}. 
Trial 1 finished with best value: 159.83255179442162 and parameters: {'x': 6.919588162035236, 'y': -6.57704125596668, 'z': -4.157191563718232}. 
Trial 2 finished with best value: 7.626764123650401 and parameters: {'x': -1.3053998639717879, 'y': 2.978717173954813, 'z': 4.1636186163237365}. 
Trial 21 finished with best value: 4.082878261206643 and parameters: {'x': -0.9926841299551272, 'y': 1.665213499245479, 'z': 3.0024936607957393}. 
Trial 29 finished with best value: 3.6726776120838824 and parameters: {'x': 2.0038973632986297, 'y': 0.3787112861454629, 'z': 3.190500924863483}. 
Trial 32 finished with best value: 1.6660402994238952 and parameters: {'x': 1.9779070538930343, 'y': 2.1608518645468258, 'z': 3.8269611665864067}. 
Trial 71 finished with best value: 0.5027736619471901 and parameters: {'x': 1.5627642695646873, 'y': 2.2084263728124554,

In [None]:
study.best_params

{'x': 1.5627642695646873, 'y': 2.2084263728124554, 'z': 3.3776618672367618}

In [None]:
study.optimize(objective, n_trials=500, callbacks=[logging_callback])

Trial 112 finished with best value: 0.2838186411163353 and parameters: {'x': 1.3821926696075142, 'y': 2.303204599867226, 'z': 2.785957071983277}. 
Trial 113 finished with best value: 0.2636754457290153 and parameters: {'x': 1.4633972103468333, 'y': 2.112157177799116, 'z': 2.8093190134283823}. 
Trial 114 finished with best value: 0.13125427164753062 and parameters: {'x': 1.213208423245971, 'y': 2.028933781114155, 'z': 2.7085222543401715}. 
Trial 142 finished with best value: 0.09764704185839798 and parameters: {'x': 1.109293241869957, 'y': 1.900423125463834, 'z': 3.2752934347156897}. 
Trial 143 finished with best value: 0.023384939524816253 and parameters: {'x': 1.1079473428591624, 'y': 1.9704852367666095, 'z': 3.104217030497609}. 
Trial 381 finished with best value: 0.02105327177210695 and parameters: {'x': 0.8966563295800967, 'y': 1.9386566935358678, 'z': 2.918695902266264}. 


In [None]:
study.best_params

{'x': 0.8966563295800967, 'y': 1.9386566935358678, 'z': 2.918695902266264}

In [None]:
study_info(study)

Number of finished trials: 600
Best trial : 
  Number: 381
  Value: 0.02105327177210695
  Params: 
    x: 0.8966563295800967
    y: 1.9386566935358678
    z: 2.918695902266264


In [None]:
optuna.visualization.plot_optimization_history(study)

In [None]:
optuna.visualization.plot_slice(study)

## Optuna with sklearn

In [None]:
wine = sklearn.datasets.load_wine()
df = pd.DataFrame(data=wine.data, columns = wine.feature_names)
df["target"] = wine.target
df.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0


In [None]:
df["target"].value_counts()

1    71
0    59
2    48
Name: target, dtype: int64

In [None]:
def objective(trial):
    x, y = sklearn.datasets.load_wine(return_X_y=True)

    classifier_name = trial.suggest_categorical("classifier", ["SVC", "RandomForest"])
    if classifier_name == "SVC":
        c = trial.suggest_float("svc_c", 1e-10, 1e10, log=True)
        
        clf = sklearn.svm.SVC(C=c, gamma="auto")
    else:
        n_estimators = trial.suggest_int('n_estimators', 2, 20)
        max_depth = trial.suggest_int("max_depth", 2, 32, log=True)
        
        clf = sklearn.ensemble.RandomForestClassifier(
           n_estimators=n_estimators,
           max_depth=max_depth
        )

    score = sklearn.model_selection.cross_val_score(clf, x, y, n_jobs=-1, cv=3)
    accuracy = score.mean()
    return accuracy

In [None]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

study_info(study)

Number of trials: 100
Best trial: 
  No.: 57
  Value: 0.9663841807909604
  Params: 
    classifier: RandomForest
    n_estimators: 20
    max_depth: 24


In [None]:
optuna.visualization.plot_optimization_history(study)

In [None]:
optuna.visualization.plot_slice(study)

In [None]:
plot_param_importances(study)

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=a605a3e6-1564-47b2-94e7-842290ba7692' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>