# <b> Lec11. Optuna </b>
* https://optuna.org/
* 하이퍼파라미터 튜닝

In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# ------------------------------------------------------
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.tree import DecisionTreeClassifier
# ------------------------------------------------------

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score 
from sklearn.metrics import confusion_matrix,classification_report
from sklearn.metrics import roc_curve, roc_auc_score
# ------------------------------------------------------
from sklearn.preprocessing import OneHotEncoder
# ------------------------------------------------------
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

import warnings
warnings.filterwarnings(action="ignore")

1. Wrap model training with an objective function and return score matrics
2. Suggest hyperparameters using a trial object
3. Create a study object and execute the optimization

# basic Tutorial

* optuna.trial.Trial.suggest_categorical() : 리스트 범위 내에서 값을 선택한다.
* optuna.trial.Trial.suggest_int() : 범위 내에서 정수형 값을 선택한다.
* optuna.trial.Trial.suggest_float() : 범위 내에서 소수형 값을 선택한다.
* optuna.trial.Trial.suggest_uniform() : 범위 내에서 균일분포 값을 선택한다.
* optuna.trial.Trial.suggest_discrete_uniform() : 범위 내에서 이산 균일분포 값을 선택한다.
* optuna.trial.Trial.suggest_loguniform() : 범위 내에서 로그 함수 값을 선택한다.

In [19]:
"""
Optuna example that optimizes a classifier configuration for Iris dataset using sklearn.

In this example, we optimize a classifier configuration for Iris dataset. Classifiers are from
scikit-learn. We optimize both the choice of classifier (among SVC and RandomForest) and their
hyperparameters.

"""

import optuna

import sklearn.datasets
import sklearn.ensemble
import sklearn.model_selection
import sklearn.svm
import logging

# 로깅 레벨 설정
optuna.logging.set_verbosity(logging.CRITICAL)
iris = sklearn.datasets.load_iris()
x, y = iris.data, iris.target

# FYI: Objective functions can take additional arguments
# (https://optuna.readthedocs.io/en/stable/faq.html#objective-func-additional-args).
def my_objective(trial):

    classifier_name = trial.suggest_categorical("classifier", ["SVC", "RandomForest"])
    if classifier_name == "SVC":
        svc_c = trial.suggest_float("svc_c", 1e-10, 1e10, log=True)
        model = sklearn.svm.SVC(C=svc_c, gamma="auto")
    else:
        rf_max_depth = trial.suggest_int("rf_max_depth", 2, 32, log=True)
        model = sklearn.ensemble.RandomForestClassifier(
            max_depth=rf_max_depth, n_estimators=10
        )

    scores = cross_val_score(model, x, y, cv=3, scoring="accuracy")
    accuracy = scores.mean()
    return accuracy


# if __name__ == "__main__":
study = optuna.create_study(direction="maximize")
study.optimize(my_objective, n_trials=10)
print(study.best_trial)

FrozenTrial(number=0, state=TrialState.COMPLETE, values=[0.96], datetime_start=datetime.datetime(2023, 9, 6, 11, 10, 26, 738492), datetime_complete=datetime.datetime(2023, 9, 6, 11, 10, 26, 744649), params={'classifier': 'SVC', 'svc_c': 1866.9833804266057}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'classifier': CategoricalDistribution(choices=('SVC', 'RandomForest')), 'svc_c': FloatDistribution(high=10000000000.0, log=True, low=1e-10, step=None)}, trial_id=0, value=None)


In [None]:
)