<a href="https://colab.research.google.com/github/Seungkyu-Han/colab_ml/blob/main/hyper_opt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import hyperopt

print(hyperopt.__version__)

0.2.7


In [None]:
from hyperopt import hp

search_space = {
    'x': hp.quniform('x', -10, 10, 1),
    'y': hp.quniform('y', -15, 15, 1),
}

In [None]:
search_space

{'x': <hyperopt.pyll.base.Apply at 0x7fdf841db620>,
 'y': <hyperopt.pyll.base.Apply at 0x7fdf841db6e0>}

In [None]:
from hyperopt import STATUS_OK

def objective_func(search_space):
  x = search_space['x']
  y = search_space['y']

  retval = x**2 - 20*y

  return retval


In [None]:
from hyperopt import fmin, tpe, Trials
import numpy as np

trial_val = Trials()

best_01 = fmin(
    fn=objective_func,
    space=search_space,
    algo=tpe.suggest,
    max_evals=5,
    trials=trial_val,
)

print(best_01)

100%|██████████| 5/5 [00:00<00:00, 425.90trial/s, best loss: -184.0]
{'x': np.float64(6.0), 'y': np.float64(11.0)}


In [None]:
import pandas as pd

losses = [loss_dict['loss'] for loss_dict in trial_val.results]

result_df = pd.DataFrame({
    'x': trial_val.vals['x'],
    'y': trial_val.vals['y'],
    'losses': losses
})

print(result_df)

     x     y  losses
0  5.0 -14.0   305.0
1 -7.0   1.0    29.0
2  6.0  11.0  -184.0
3  9.0 -12.0   321.0
4 -7.0 -14.0   329.0


### HyperOpt를 XGBoost 하이퍼 파라미터 튜닝에 적용

In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

dataset = load_breast_cancer()

cancer_df = pd.DataFrame(data=dataset.data, columns=dataset.feature_names)
cancer_df['target'] = dataset.target
X_features = cancer_df.iloc[:, :-1]
y_label = cancer_df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X_features, y_label, test_size=0.2, random_state=156)

X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size = 0.1, random_state=156)

In [None]:
from hyperopt import hp

xgb_search_space = {
    'max_depth': hp.quniform('max_depth', 5, 20, 1),
    'min_child_weight': hp.quniform('min_child_weight', 1, 2, 1),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.2),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
}

In [None]:
from sklearn.model_selection import cross_val_score
from xgboost import XGBClassifier
from hyperopt import STATUS_OK

def objective_func(search_space):
  xgb_clf = XGBClassifier(
      n_estimators=100,
      max_depth=int(search_space['max_depth']),
      min_child_weight=int(search_space['min_child_weight']),
      learning_rate=search_space['learning_rate'],
      colsample_bytree=search_space['colsample_bytree'],
      eval_metric='logloss',
  )

  accuracy = cross_val_score(xgb_clf, X_train, y_train, scoring='accuracy', cv=3)

  return {'loss': -1 * np.mean(accuracy), 'status': STATUS_OK}

In [None]:
from hyperopt import fmin, tpe, Trials

trial_val = Trials()
best = fmin(
    fn = objective_func,
    space=xgb_search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=trial_val, rstate=np.random.default_rng(seed=9)
  )

100%|██████████| 50/50 [00:22<00:00,  2.22trial/s, best loss: -0.9670616939700244]


In [None]:
print(best)

{'colsample_bytree': np.float64(0.858999866296754), 'learning_rate': np.float64(0.17459146963933464), 'max_depth': np.float64(20.0), 'min_child_weight': np.float64(2.0)}


In [None]:
from sklearn.model_selection import cross_val_score
from xgboost import XGBClassifier
from hyperopt import STATUS_OK

def objective_func(search_space):
  xgb_clf = XGBClassifier(
      n_estimators=400,
      max_depth=int(search_space['max_depth']),
      min_child_weight=int(search_space['min_child_weight']),
      learning_rate=search_space['learning_rate'],
      colsample_bytree=search_space['colsample_bytree'],
      eval_metric='logloss',
  )

  accuracy = cross_val_score(xgb_clf, X_train, y_train, scoring='accuracy', cv=3)

  return {'loss': -1 * np.mean(accuracy), 'status': STATUS_OK}

In [None]:
from hyperopt import fmin, tpe, Trials

trial_val = Trials()
best = fmin(
    fn = objective_func,
    space=xgb_search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=trial_val, rstate=np.random.default_rng(seed=9)
  )

100%|██████████| 50/50 [00:34<00:00,  1.46trial/s, best loss: -0.9692546764261647]


In [None]:
best

{'colsample_bytree': np.float64(0.7271863641855161),
 'learning_rate': np.float64(0.10595638723433032),
 'max_depth': np.float64(5.0),
 'min_child_weight': np.float64(2.0)}