In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style='whitegrid', font_scale=1)
sns.set_palette('Set2', n_colors=10)
plt.rc('font', family='AppleGothic')
plt.rc('axes', unicode_minus=False)

df = pd.read_pickle('data/acc_raw.pkl')
df = df[~df['음주운전'].isnull()]
y_train = df['음주운전']
x_train = df.drop(columns='음주운전')
cat_list = df.select_dtypes(['object']).columns.tolist()
num_list = df.select_dtypes(['number']).drop(columns='음주운전').columns.tolist()
x_train[cat_list] = x_train[cat_list].fillna('UNK')

In [2]:
from catboost import CatBoostClassifier
from sklearn.metrics import average_precision_score
from sklearn.model_selection import train_test_split
import optuna


In [3]:
def objective(trial):
    X = x_train
    Y = y_train
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.35, random_state=1111)
    param = {
        "learning_rate": trial.suggest_float("learning_rate", 0.001, 0.5),
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.01, 0.1),
        "depth": trial.suggest_int("depth", 1, 12),
        "boosting_type": trial.suggest_categorical("boosting_type", ["Ordered", "Plain"]),
        "bootstrap_type": trial.suggest_categorical(
            "bootstrap_type", ["Bayesian", "Bernoulli", "MVS"]
        )}

    if param["bootstrap_type"] == "Bayesian":
        param["bagging_temperature"] = trial.suggest_float("bagging_temperature", 0, 10)
    elif param["bootstrap_type"] == "Bernoulli":
        param["subsample"] = trial.suggest_float("subsample", 0.1, 1)

    cat_cls = CatBoostClassifier(**param)

    cat_cls.fit(X_train, Y_train, eval_set=[(X_test, Y_test)], cat_features=cat_list,verbose=0, early_stopping_rounds=100)

    preds = cat_cls.predict_proba(X_test)[:,1]
    accuracy = average_precision_score(Y_test, preds)
    return accuracy

In [4]:

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=10, timeout=600)

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[32m[I 2023-04-22 15:25:06,377][0m A new study created in memory with name: no-name-9ba9739f-7c95-4de3-ab17-a94ed7dc1b53[0m
[32m[I 2023-04-22 15:25:19,719][0m Trial 0 finished with value: 0.4857114642072371 and parameters: {'learning_rate': 0.021638482693544184, 'colsample_bylevel': 0.0467770960142275, 'depth': 9, 'boosting_type': 'Plain', 'bootstrap_type': 'Bernoulli', 'subsample': 0.5682996167268485}. Best is trial 0 with value: 0.4857114642072371.[0m
[32m[I 2023-04-22 15:25:28,722][0m Trial 1 finished with value: 0.48260801424246796 and parameters: {'learning_rate': 0.2270893298633922, 'colsample_bylevel': 0.04121237595554739, 'depth': 4, 'boosting_type': 'Plain', 'bootstrap_type': 'Bayesian', 'bagging_temperature': 7.096642028801309}. Best is trial 0 with value: 0.4857114642072371.[0m
[32m[I 2023-04-22 15:25:38,738][0m Trial 2 finished with value: 0.4552423895444777 and parameters: {'learning_rate': 0.0945433797074177, 'colsample_bylevel': 0.05035194445472151, 'depth': 2

Number of finished trials: 10
Best trial:
  Value: 0.5002596463056314
  Params: 
    learning_rate: 0.22284374993634803
    colsample_bylevel: 0.09253654613972763
    depth: 9
    boosting_type: Ordered
    bootstrap_type: MVS


# 2. Visulaization
## 2.1. Optimization history

In [5]:
optuna.visualization.plot_optimization_history(study)

## 2.2. Slice Plot

In [6]:
optuna.visualization.plot_slice(study)

## 2.3. Contour Plot


In [7]:
optuna.visualization.plot_contour(study)