# HyperOpt를 이용한 하이퍼파라미터 튜닝

1. 검색공간 설정
2. 대체모델을 위한 목적함수 지정
3. 최적의 파라미터 유추

In [2]:
hyperopt.__version__

'0.2.7'

In [3]:
from hyperopt import hp

#검색공간
search_space = {'x' : hp.quniform('x', -10, 10, 1), 'y': hp.quniform('y', -15, 15, 1)}

In [4]:
from hyperopt import STATUS_OK

# 목적함수
def objective_func(search_space):
    x = search_space['x']
    y = search_space['y']
    retval = x**2 - 20*y
    
    return retval


In [8]:
## 최적 입력값 유추 

from hyperopt import fmin, tpe, Trials
import numpy as np

trial_val = Trials()
best_01 = fmin(fn=objective_func, space= search_space, algo= tpe.suggest, max_evals= 5,
               trials= trial_val, rstate= np.random.default_rng(seed = 0))

print('best:', best_01)

100%|██████████| 5/5 [00:00<00:00, 790.36trial/s, best loss: -224.0]
best: {'x': -4.0, 'y': 12.0}


In [9]:
trial_val = Trials()
best_02 = fmin(fn=objective_func, space= search_space, algo= tpe.suggest, max_evals= 20,
               trials= trial_val, rstate= np.random.default_rng(seed = 0))

print('best:', best_02)

100%|██████████| 20/20 [00:00<00:00, 1726.12trial/s, best loss: -296.0]
best: {'x': 2.0, 'y': 15.0}


In [10]:
trial_val.best_trial

{'state': 2,
 'tid': 5,
 'spec': None,
 'result': {'loss': -296.0, 'status': 'ok'},
 'misc': {'tid': 5,
  'cmd': ('domain_attachment', 'FMinIter_Domain'),
  'workdir': None,
  'idxs': {'x': [5], 'y': [5]},
  'vals': {'x': [2.0], 'y': [15.0]}},
 'exp_key': None,
 'owner': None,
 'version': 0,
 'book_time': datetime.datetime(2025, 1, 2, 2, 41, 21, 417000),
 'refresh_time': datetime.datetime(2025, 1, 2, 2, 41, 21, 417000)}

In [11]:
import pandas as pd 

losses = [loss_dict['loss'] for loss_dict in trial_val.results]

result_df = pd.DataFrame({'x': trial_val.vals['x'], 'y': trial_val.vals['y'], 'losses': losses})
result_df

Unnamed: 0,x,y,losses
0,-6.0,5.0,-64.0
1,-4.0,10.0,-184.0
2,4.0,-2.0,56.0
3,-4.0,12.0,-224.0
4,9.0,1.0,61.0
5,2.0,15.0,-296.0
6,10.0,7.0,-40.0
7,-9.0,-10.0,281.0
8,-8.0,0.0,64.0
9,-0.0,-5.0,100.0


## XGB 하이퍼파라미터 조정

In [20]:
from hyperopt import fmin, tpe, Trials
import numpy as np
from lightgbm import  LGBMClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

dataset = load_breast_cancer()
cancer_df = pd.DataFrame(data=dataset.data, columns=dataset.feature_names)
cancer_df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [22]:
X_features =  dataset.data
y_label = dataset.target

#1단계
X_train, X_test, y_train, y_test = train_test_split(X_features, y_label, test_size=0.2, random_state=156)
#2단계
X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=156)

In [23]:
from hyperopt import fmin, tpe, Trials, STATUS_OK
from sklearn.base import BaseEstimator, ClassifierMixin
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score

class SklearnCompatibleXGBClassifier(XGBClassifier, BaseEstimator, ClassifierMixin):
    pass

def objective_func_sxgb(params):
    
    model = SklearnCompatibleXGBClassifier(
        n_estimators=int(params['n_estimators']),
        max_depth=int(params['max_depth']),
        learning_rate=params['learning_rate'],
        subsample=params['subsample'],
        colsample_bytree=params['colsample_bytree'],
        random_state=42,
        eval_metric='logloss'
    )
    score_mean = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy').mean()
    return {'loss': -1*score_mean , 'status': STATUS_OK}

In [18]:
from hyperopt import fmin, tpe, Trials, STATUS_OK
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score

def objective_func_xgb(params):
    
    model = XGBClassifier(
        n_estimators=params['n_estimators'],
        max_depth=params['max_depth'],
        learning_rate=params['learning_rate'],
        subsample=params['subsample'],
        colsample_bytree=params['colsample_bytree'],
        random_state=42,
        eval_metric='logloss'
    )
    score_mean = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy').mean()
    return {'loss': -1*score_mean , 'status': STATUS_OK}

In [24]:
from hyperopt import fmin, tpe, Trials, STATUS_OK
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score

def objective_func_xgb(params):
    
    model = XGBClassifier(
        n_estimators=params['n_estimators'],
        max_depth=params['max_depth'],
        learning_rate=params['learning_rate'],
        subsample=params['subsample'],
        colsample_bytree=params['colsample_bytree'],
        random_state=42,
        eval_metric='logloss'
    )
    score_mean = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy').mean()
    return {'loss': -1*score_mean , 'status': STATUS_OK}


In [25]:
from hyperopt.pyll.base import scope

# 하이퍼파라미터 검색공간
search_space = { 'n_estimators': scope.int(hp.quniform('n_estimators', 50, 300, 10)), 
                'max_depth': scope.int(hp.quniform('max_depth', 3, 10, 1)),
                'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(0.3)),
                'subsample':hp.uniform('subsample', 0.5, 1.0),
                'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1.0),
                }

In [48]:
trials = Trials()

best_params1 = fmin( 
    fn=objective_func_sxgb,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=trials,
    verbose= True
)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]







  2%|▏         | 1/50 [00:00<00:40,  1.22trial/s, best loss: -0.9626373626373628]









  4%|▍         | 2/50 [00:01<00:25,  1.91trial/s, best loss: -0.9626373626373628]







  6%|▌         | 3/50 [00:01<00:20,  2.30trial/s, best loss: -0.9626373626373628]








  8%|▊         | 4/50 [00:02<00:23,  1.98trial/s, best loss: -0.9626373626373628]









 10%|█         | 5/50 [00:02<00:26,  1.69trial/s, best loss: -0.9692307692307693]








 12%|█▏        | 6/50 [00:03<00:20,  2.14trial/s, best loss: -0.9692307692307693]







 14%|█▍        | 7/50 [00:03<00:23,  1.84trial/s, best loss: -0.9692307692307693]









 16%|█▌        | 8/50 [00:04<00:23,  1.78trial/s, best loss: -0.9692307692307693]






 18%|█▊        | 9/50 [00:04<00:20,  2.00trial/s, best loss: -0.9692307692307693]









 20%|██        | 10/50 [00:05<00:20,  1.93trial/s, best loss: -0.9692307692307693]









 22%|██▏       | 11/50 [00:06<00:27,  1.44trial/s, best loss: -0.9692307692307693]






 24%|██▍       | 12/50 [00:06<00:23,  1.62trial/s, best loss: -0.9692307692307693]









 26%|██▌       | 13/50 [00:07<00:19,  1.86trial/s, best loss: -0.9692307692307693]








 28%|██▊       | 14/50 [00:07<00:15,  2.32trial/s, best loss: -0.9692307692307693]








 30%|███       | 15/50 [00:07<00:12,  2.72trial/s, best loss: -0.9692307692307693]








 32%|███▏      | 16/50 [00:08<00:13,  2.44trial/s, best loss: -0.9692307692307693]









 34%|███▍      | 17/50 [00:09<00:20,  1.64trial/s, best loss: -0.9692307692307693]







 36%|███▌      | 18/50 [00:09<00:18,  1.70trial/s, best loss: -0.9692307692307693]









 38%|███▊      | 19/50 [00:10<00:16,  1.87trial/s, best loss: -0.9692307692307693]








 40%|████      | 20/50 [00:10<00:13,  2.21trial/s, best loss: -0.9692307692307693]







 42%|████▏     | 21/50 [00:10<00:14,  1.99trial/s, best loss: -0.9692307692307693]








 44%|████▍     | 22/50 [00:11<00:13,  2.10trial/s, best loss: -0.9692307692307693]








 46%|████▌     | 23/50 [00:11<00:13,  2.00trial/s, best loss: -0.9692307692307693]









 48%|████▊     | 24/50 [00:12<00:13,  1.93trial/s, best loss: -0.9692307692307693]






 50%|█████     | 25/50 [00:12<00:12,  2.07trial/s, best loss: -0.9692307692307693]









 52%|█████▏    | 26/50 [00:13<00:10,  2.21trial/s, best loss: -0.9692307692307693]









 54%|█████▍    | 27/50 [00:13<00:09,  2.32trial/s, best loss: -0.9692307692307693]






 56%|█████▌    | 28/50 [00:14<00:09,  2.31trial/s, best loss: -0.9736263736263737]









 58%|█████▊    | 29/50 [00:14<00:08,  2.38trial/s, best loss: -0.9736263736263737]









 60%|██████    | 30/50 [00:15<00:10,  1.83trial/s, best loss: -0.9736263736263737]






 62%|██████▏   | 31/50 [00:15<00:09,  2.03trial/s, best loss: -0.9736263736263737]









 64%|██████▍   | 32/50 [00:16<00:08,  2.07trial/s, best loss: -0.9736263736263737]









 66%|██████▌   | 33/50 [00:16<00:08,  1.93trial/s, best loss: -0.9736263736263737]






 68%|██████▊   | 34/50 [00:17<00:07,  2.01trial/s, best loss: -0.9736263736263737]










 70%|███████   | 35/50 [00:17<00:08,  1.80trial/s, best loss: -0.9736263736263737]






 72%|███████▏  | 36/50 [00:18<00:07,  1.92trial/s, best loss: -0.9736263736263737]









 74%|███████▍  | 37/50 [00:18<00:06,  2.07trial/s, best loss: -0.9736263736263737]









 76%|███████▌  | 38/50 [00:19<00:07,  1.66trial/s, best loss: -0.9736263736263737]







 78%|███████▊  | 39/50 [00:20<00:06,  1.68trial/s, best loss: -0.9736263736263737]









 80%|████████  | 40/50 [00:20<00:06,  1.61trial/s, best loss: -0.9736263736263737]







 82%|████████▏ | 41/50 [00:21<00:05,  1.68trial/s, best loss: -0.9736263736263737]









 84%|████████▍ | 42/50 [00:22<00:04,  1.69trial/s, best loss: -0.9736263736263737]






 86%|████████▌ | 43/50 [00:22<00:03,  1.83trial/s, best loss: -0.9736263736263737]










 88%|████████▊ | 44/50 [00:22<00:02,  2.06trial/s, best loss: -0.9736263736263737]







 90%|█████████ | 45/50 [00:23<00:02,  2.29trial/s, best loss: -0.9736263736263737]









 92%|█████████▏| 46/50 [00:23<00:01,  2.35trial/s, best loss: -0.9736263736263737]






 94%|█████████▍| 47/50 [00:23<00:01,  2.48trial/s, best loss: -0.9736263736263737]










 96%|█████████▌| 48/50 [00:25<00:01,  1.59trial/s, best loss: -0.9736263736263737]






 98%|█████████▊| 49/50 [00:25<00:00,  1.80trial/s, best loss: -0.9736263736263737]










100%|██████████| 50/50 [00:26<00:00,  1.92trial/s, best loss: -0.9736263736263737]


In [34]:
best_params1

{'colsample_bytree': 0.5004324015938035,
 'learning_rate': 0.05948477397856746,
 'max_depth': 5.0,
 'n_estimators': 120.0,
 'subsample': 0.5370038667202341}

In [35]:
trials = Trials()

best_params = fmin( 
    fn=objective_func_xgb,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=trials,
    verbose= False
)



In [30]:
best_params

{'colsample_bytree': 0.6724107407227828,
 'learning_rate': 0.19081440249544293,
 'max_depth': 10.0,
 'n_estimators': 230.0,
 'subsample': 0.6030166626194636}

In [31]:
# 최적화 파라미터 기준 모델 평가

In [47]:
#XGB
from sklearn.metrics import roc_auc_score

xgb_clf = XGBClassifier(n_estimators=500, learning_rate=0.05, random_state=156)

xgb_clf.fit(X_tr, y_tr, 
             eval_set=[(X_tr, y_tr), (X_val, y_val)],
             eval_metric='auc',
             early_stopping_rounds=100,
             verbose=False)

xgb_roc_score = roc_auc_score(y_test, xgb_clf.predict_proba(X_test)[:, 1])
print(np.round(xgb_roc_score,4))



0.9916


In [49]:
#LGBM

lgbm_clf = LGBMClassifier(n_estimators=500, learning_rate=0.05, random_state=156)

lgbm_clf.fit(X_tr, y_tr, 
             eval_set=[(X_tr, y_tr), (X_val, y_val)],
             eval_metric='auc',
             early_stopping_rounds=100,
             verbose=True)

lgbm_roc_score = roc_auc_score(y_test, lgbm_clf.predict_proba(X_test)[:, 1])
print(np.round(lgbm_roc_score,4))




[1]	training's auc: 0.991364	training's binary_logloss: 0.625671	valid_1's auc: 0.915822	valid_1's binary_logloss: 0.628248
[2]	training's auc: 0.994944	training's binary_logloss: 0.588173	valid_1's auc: 0.935091	valid_1's binary_logloss: 0.601106
[3]	training's auc: 0.995007	training's binary_logloss: 0.554518	valid_1's auc: 0.925963	valid_1's binary_logloss: 0.577587
[4]	training's auc: 0.995159	training's binary_logloss: 0.523972	valid_1's auc: 0.920892	valid_1's binary_logloss: 0.556324
[5]	training's auc: 0.994957	training's binary_logloss: 0.49615	valid_1's auc: 0.920892	valid_1's binary_logloss: 0.537407
[6]	training's auc: 0.994982	training's binary_logloss: 0.470108	valid_1's auc: 0.939148	valid_1's binary_logloss: 0.519401
[7]	training's auc: 0.99415	training's binary_logloss: 0.446647	valid_1's auc: 0.939148	valid_1's binary_logloss: 0.502637
[8]	training's auc: 0.994125	training's binary_logloss: 0.425055	valid_1's auc: 0.939148	valid_1's binary_logloss: 0.488311
[9]	traini

In [45]:
print('xgb 정확도 : {0:.4f}, lgbm 정확도: {1:.4f}'.format(xgb_roc_score, lgbm_roc_score))

xgb 정확도 : 0.9916, lgbm 정확도: 0.9877
