In [11]:
import lightgbm
lightgbm.__version__

'3.3.2'

In [12]:
import xgboost
xgboost.__version__

'1.5.0'

In [13]:
import hyperopt
hyperopt.__version__

'0.2.7'

### HyperOpt를 이용한 하이퍼 파라미터 튜닝

1. 입력 변수명과 입력값의 검색 공간(Search Space) 설정
2. 목적 함수(Objective Function) 설정
3. 목적 함수의 반환 최솟값 가지는 최적 입력값 유추,

In [14]:
# 1. 입력 변수명과 입력값의 검색 공간(Search Space) 설정
from hyperopt import hp

# -10~10 까지 1 간격을 가지는 입력 변수 x와 -15~15까지 1 간격으로 입력 변수 y 설정
search_space = {'x':hp.quniform('x', -10,10,1),'y':hp.quniform('y',-15,15,1)}



In [15]:
# 2. 목적 함수(Objective Function) 설정
from hyperopt import STATUS_OK

def objective_func(search_space):
    x = search_space['x']
    y = search_space['y']
    retval = x**2 - 20*y

    return retval

In [16]:
# 3. 목적 함수의 반환 최솟값 가지는 최적 입력값 유추
from hyperopt import fmin, tpe, Trials
import numpy as np

trial_val = Trials()

best_01 = fmin(fn=objective_func, space=search_space, algo=tpe.suggest, max_evals=5
        ,trials=trial_val, rstate=np.random.default_rng(seed=0))
best_01

100%|██████████| 5/5 [00:00<00:00, 842.47trial/s, best loss: -224.0]


{'x': -4.0, 'y': 12.0}

In [17]:
best_02 = fmin(fn=objective_func, space=search_space, algo=tpe.suggest, max_evals=20,
    trials=trial_val, rstate=np.random.default_rng(seed=0))
best_02

100%|██████████| 20/20 [00:00<00:00, 652.11trial/s, best loss: -296.0]


{'x': 2.0, 'y': 15.0}

In [18]:
trial_val.results

[{'loss': -64.0, 'status': 'ok'},
 {'loss': -184.0, 'status': 'ok'},
 {'loss': 56.0, 'status': 'ok'},
 {'loss': -224.0, 'status': 'ok'},
 {'loss': 61.0, 'status': 'ok'},
 {'loss': -64.0, 'status': 'ok'},
 {'loss': -184.0, 'status': 'ok'},
 {'loss': 56.0, 'status': 'ok'},
 {'loss': -224.0, 'status': 'ok'},
 {'loss': 61.0, 'status': 'ok'},
 {'loss': -296.0, 'status': 'ok'},
 {'loss': -40.0, 'status': 'ok'},
 {'loss': 281.0, 'status': 'ok'},
 {'loss': 64.0, 'status': 'ok'},
 {'loss': 100.0, 'status': 'ok'},
 {'loss': 60.0, 'status': 'ok'},
 {'loss': -39.0, 'status': 'ok'},
 {'loss': 1.0, 'status': 'ok'},
 {'loss': -164.0, 'status': 'ok'},
 {'loss': 21.0, 'status': 'ok'}]

In [19]:
import pandas as pd

losses = [loss_dict['loss'] for loss_dict in trial_val.results]

result_df = pd.DataFrame({'x':trial_val.vals['x'], 'y':trial_val.vals['y'], 'losses':losses})
result_df.head(10)

Unnamed: 0,x,y,losses
0,-6.0,5.0,-64.0
1,-4.0,10.0,-184.0
2,4.0,-2.0,56.0
3,-4.0,12.0,-224.0
4,9.0,1.0,61.0
5,-6.0,5.0,-64.0
6,-4.0,10.0,-184.0
7,4.0,-2.0,56.0
8,-4.0,12.0,-224.0
9,9.0,1.0,61.0


### XGBoost 하이퍼파라미터 최적화

In [51]:
from hyperopt import fmin, tpe, Trials
import numpy as np
from lightgbm import  LGBMClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

dataset = load_breast_cancer()
cancer_df = pd.DataFrame(data=dataset.data, columns=dataset.feature_names)
cancer_df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [52]:
X_features =  dataset.data
y_label = dataset.target

#1단계
X_train, X_test, y_train, y_test = train_test_split(X_features, y_label, test_size=0.2, random_state=156)
#2단계
X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=156)

### 목적함수

In [53]:
from hyperopt import fmin, tpe, Trials, STATUS_OK
from sklearn.base import BaseEstimator, ClassifierMixin
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score

class SklearnCompatibleXGBClassifier(XGBClassifier, BaseEstimator, ClassifierMixin):
    pass

def objective_func_sxgb(params):
    
    model = SklearnCompatibleXGBClassifier(
        n_estimators=int(params['n_estimators']),
        max_depth=int(params['max_depth']),
        learning_rate=params['learning_rate'],
        subsample=params['subsample'],
        colsample_bytree=params['colsample_bytree'],
        random_state=42,
        eval_metric='logloss'
    )
    score_mean = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy').mean()
    return {'loss': -1*score_mean , 'status': STATUS_OK}

In [54]:
from hyperopt import fmin, tpe, Trials, STATUS_OK
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score

def objective_func_xgb(params):
    
    model = XGBClassifier(
        n_estimators=params['n_estimators'],
        max_depth=params['max_depth'],
        learning_rate=params['learning_rate'],
        subsample=params['subsample'],
        colsample_bytree=params['colsample_bytree'],
        random_state=42,
        eval_metric='logloss'
    )
    score_mean = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy').mean()
    return {'loss': -1*score_mean , 'status': STATUS_OK}

In [55]:
from hyperopt.pyll.base import scope

# 하이퍼파라미터 검색공간
search_space = { 'n_estimators': scope.int(hp.quniform('n_estimators', 50, 300, 10)), 
                'max_depth': scope.int(hp.quniform('max_depth', 3, 10, 1)),
                'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(0.3)),
                'subsample':hp.uniform('subsample', 0.5, 1.0),
                'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1.0),
                }


### 파라미터 유추1,2

In [57]:
trials = Trials()

best_params = fmin(
     fn=objective_func_sxgb,
     space=search_space,
     algo=tpe.suggest,
     max_evals=50,
     trials=trials
     )


  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]







  0%|          | 0/50 [00:10<?, ?trial/s, best loss=?]


KeyboardInterrupt: 

In [None]:
trials = Trials()

best_params = fmin(
     fn=objective_func_xgb,
     space=search_space,
     algo=tpe.suggest,
     max_evals=50,
     trials=trials
     )

### best parameter

In [None]:
best_params

### 최적의 파라미터 적용한 모델 생성

In [None]:
best_model = XGBClassifier(n_estimators = int(best_params['n_estimators']),
                           max_depth = int(best_params['max_depth']),
                           learning_rate=best_params['learning_rate'],
                           subsample=best_params['subsample'] ,
                           colsample_bytree=best_params['colsample_bytree'],
                           random_state=42,
                           eval_metric='logloss'
                           
                           )

In [None]:
best_model.fit(X_train, y_train)

In [None]:
from sklearn.metrics import accuracy_score

pred = best_model.predict(X_test)
accuracy_score(y_test,pred)