[Reference](https://medium.com/bitgrit-data-science-publication/the-missing-library-in-your-machine-learning-workflow-455745f0e66b)

In [1]:
!pip install --quiet optuna

[K     |████████████████████████████████| 308 kB 15.1 MB/s 
[K     |████████████████████████████████| 209 kB 58.4 MB/s 
[K     |████████████████████████████████| 81 kB 9.5 MB/s 
[K     |████████████████████████████████| 78 kB 4.7 MB/s 
[K     |████████████████████████████████| 112 kB 62.3 MB/s 
[K     |████████████████████████████████| 147 kB 45.1 MB/s 
[K     |████████████████████████████████| 49 kB 5.0 MB/s 
[?25h  Building wheel for pyperclip (setup.py) ... [?25l[?25hdone


In [2]:
import pandas as pd
import numpy as np

import optuna
from optuna.visualization import plot_param_importances

import sklearn.datasets
import sklearn.ensemble
import sklearn.model_selection
import sklearn.svm

optuna.logging.set_verbosity(optuna.logging.WARNING)

In [3]:
def objective(trial):
  x = trial.suggest_float("x", -10, 10)
  return (x - 1) ** 2 # objective function

In [4]:
study = optuna.create_study()
study.optimize(objective, n_trials = 100) # , callbacks=[logging_callback]

In [5]:
study.best_params

{'x': 0.9927523584883069}

In [6]:
study.best_value

5.25283074820171e-05

In [7]:
# print study info
def study_info(study):
  num_trial = len(study.trials)
  trial = study.best_trial
  print(f"Number of trials: {num_trial}")

  print(f"Best trial: ")
  print(f"  No.: {trial.number}")
  print(f"  Value: {trial.value}")
  print(f"  Params: ")

  for key, value in trial.params.items():
      print(f"    {key}: {value}")

In [8]:
study_info(study)

Number of trials: 100
Best trial: 
  No.: 31
  Value: 5.25283074820171e-05
  Params: 
    x: 0.9927523584883069


In [9]:
def objective(trial):
  x = trial.suggest_float("x", -10, 10)
  y = trial.suggest_float("y", -10, 10)
  z = trial.suggest_float("z", -10, 10)
  return (x - 1)**2 + (y - 2)**2 + (z - 3)**2

study = optuna.create_study() 
study.optimize(objective, n_trials = 100) # , callbacks=[logging_callback]

In [10]:
study.best_params

{'x': 0.902571381085514, 'y': 2.277880677137399, 'z': 2.6613319230939543}

In [11]:
study.optimize(objective, n_trials=500) # , callbacks=[logging_callback]

In [12]:
study_info(study)

Number of trials: 600
Best trial: 
  No.: 569
  Value: 0.00530182793294375
  Params: 
    x: 1.0444698947840774
    y: 2.0429818996189155
    z: 2.9615706792151317


In [13]:
optuna.visualization.plot_optimization_history(study)

In [14]:
optuna.visualization.plot_slice(study)

In [15]:
wine = sklearn.datasets.load_wine()
df = pd.DataFrame(data=wine.data, columns = wine.feature_names)
df["target"] = wine.target
df.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0


In [16]:
df["target"].value_counts()

1    71
0    59
2    48
Name: target, dtype: int64

In [17]:
def objective(trial):
    x, y = sklearn.datasets.load_wine(return_X_y=True)

    classifier_name = trial.suggest_categorical("classifier", ["SVC", "RandomForest"])
    if classifier_name == "SVC":
        c = trial.suggest_float("svc_c", 1e-10, 1e10, log=True)
        
        clf = sklearn.svm.SVC(C=c, gamma="auto")
    else:
        n_estimators = trial.suggest_int('n_estimators', 2, 20)
        max_depth = trial.suggest_int("max_depth", 2, 32, log=True)
        
        clf = sklearn.ensemble.RandomForestClassifier(
           n_estimators=n_estimators,
           max_depth=max_depth
        )

    score = sklearn.model_selection.cross_val_score(clf, x, y, n_jobs=-1, cv=3)
    accuracy = score.mean()
    return accuracy

In [18]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

study_info(study)

Number of trials: 100
Best trial: 
  No.: 22
  Value: 0.9718455743879472
  Params: 
    classifier: RandomForest
    n_estimators: 8
    max_depth: 8


In [19]:
optuna.visualization.plot_optimization_history(study)

In [20]:
optuna.visualization.plot_slice(study)