In [1]:
import hyperopt

print(hyperopt.__version__)

0.2.7


In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
from hyperopt import hp

search_space = {'x': hp.quniform('x', -10, 10, 1), 'y': hp.quniform('y', -15, 15, 1)}

In [4]:
search_space

{'x': <hyperopt.pyll.base.Apply at 0x27162dc4cf8>,
 'y': <hyperopt.pyll.base.Apply at 0x27162dc4e80>}

In [5]:
from hyperopt import STATUS_OK

def objective_func(search_space):
    x = search_space['x']
    y = search_space['y']
    retval = x ** 2 - 20 * y
    
    return {'loss': retval, 'status': STATUS_OK}

In [6]:
import numpy as np
from hyperopt import fmin, tpe, Trials

trial_val = Trials()

best_01 = fmin(fn=objective_func, space=search_space, algo=tpe.suggest, max_evals=5, 
               trials=trial_val, rstate=np.random.default_rng(seed=0))

print(f'best: {best_01}')

100%|█████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 833.16trial/s, best loss: -224.0]
best: {'x': -4.0, 'y': 12.0}


In [7]:
# without rstate
for i in range(3):
    trial_val = Trials()
    best_02 = fmin(fn=objective_func, space=search_space, algo=tpe.suggest, 
                                  max_evals=5, trials=trial_val)
    print(f'best: {best_02}')

100%|███████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 999.74trial/s, best loss: 20.0]
best: {'x': 0.0, 'y': -1.0}
100%|█████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 1000.17trial/s, best loss: -80.0]
best: {'x': -0.0, 'y': 4.0}
100%|█████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 999.98trial/s, best loss: -140.0]
best: {'x': -10.0, 'y': 12.0}


In [8]:
trial_val = Trials()

# max_evals=20
best_03 = fmin(fn=objective_func, space=search_space, algo=tpe.suggest, max_evals=20,
               trials=trial_val, rstate=np.random.default_rng(seed=0))

print(f'best: {best_03}')

100%|██████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 1052.63trial/s, best loss: -296.0]
best: {'x': 2.0, 'y': 15.0}


In [9]:
trial_val

<hyperopt.base.Trials at 0x27162def240>

In [10]:
print(trial_val.results)

[{'loss': -64.0, 'status': 'ok'}, {'loss': -184.0, 'status': 'ok'}, {'loss': 56.0, 'status': 'ok'}, {'loss': -224.0, 'status': 'ok'}, {'loss': 61.0, 'status': 'ok'}, {'loss': -296.0, 'status': 'ok'}, {'loss': -40.0, 'status': 'ok'}, {'loss': 281.0, 'status': 'ok'}, {'loss': 64.0, 'status': 'ok'}, {'loss': 100.0, 'status': 'ok'}, {'loss': 60.0, 'status': 'ok'}, {'loss': -39.0, 'status': 'ok'}, {'loss': 1.0, 'status': 'ok'}, {'loss': -164.0, 'status': 'ok'}, {'loss': 21.0, 'status': 'ok'}, {'loss': -56.0, 'status': 'ok'}, {'loss': 284.0, 'status': 'ok'}, {'loss': 176.0, 'status': 'ok'}, {'loss': -171.0, 'status': 'ok'}, {'loss': 0.0, 'status': 'ok'}]


In [11]:
print(trial_val.vals)

{'x': [-6.0, -4.0, 4.0, -4.0, 9.0, 2.0, 10.0, -9.0, -8.0, -0.0, -0.0, 1.0, 9.0, 6.0, 9.0, 2.0, -2.0, -4.0, 7.0, -0.0], 'y': [5.0, 10.0, -2.0, 12.0, 1.0, 15.0, 7.0, -10.0, 0.0, -5.0, -3.0, 2.0, 4.0, 10.0, 3.0, 3.0, -14.0, -8.0, 11.0, -0.0]}


In [12]:
import pandas as pd

losses = [loss_dict['loss'] for loss_dict in trial_val.results]

result_df = pd.DataFrame({'x': trial_val.vals['x'], 'y': trial_val.vals['y'], 'losses': losses})
result_df

Unnamed: 0,x,y,losses
0,-6.0,5.0,-64.0
1,-4.0,10.0,-184.0
2,4.0,-2.0,56.0
3,-4.0,12.0,-224.0
4,9.0,1.0,61.0
5,2.0,15.0,-296.0
6,10.0,7.0,-40.0
7,-9.0,-10.0,281.0
8,-8.0,0.0,64.0
9,-0.0,-5.0,100.0


In [13]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

dataset = load_breast_cancer()

cancer_df = pd.DataFrame(data=dataset.data, columns=dataset.feature_names)
cancer_df['target'] = dataset.target
x_features = cancer_df.iloc[:, :-1]
y_label = cancer_df.iloc[:, -1]

x_train, x_test, y_train, y_test = train_test_split(x_features, y_label, 
                                                    test_size=0.2, random_state=156)
x_tr, x_val, y_tr, y_val = train_test_split(x_train, y_train, 
                                            test_size=0.1, random_state=156)

In [14]:
xgb_search_space = {
    'max_depth': hp.quniform('max_depth', 5, 20, 1),
    'min_child_weight': hp.quniform('min_child_weight', 1, 2, 1),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.2),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1)
}

In [15]:
from sklearn.model_selection import cross_val_score
from xgboost import XGBClassifier
from hyperopt import STATUS_OK

def objective_func(search_space):
    
    params= {
        'n_estimators': 100, 
        'max_depth': int(search_space['max_depth']),
        'min_child_weight': int(search_space['min_child_weight']),
        'learning_rate': search_space['learning_rate'],
        'colsample_bytree': search_space['colsample_bytree'],
        'eval_metric': 'logloss',
    }
    xgb_clf = XGBClassifier(**params)
    accuracy = cross_val_score(xgb_clf, x_train, y_train, scoring='accuracy', cv=3)
    
    return {'loss': -1 * np.mean(accuracy), 'status': STATUS_OK}

In [16]:
trial_val = Trials()

best = fmin(fn=objective_func, space=xgb_search_space, algo=tpe.suggest, 
            max_evals=50, trials=trial_val, rstate=np.random.default_rng(seed=9))

print(f'best: {best}')

100%|███████████████████████████████████████████████| 50/50 [00:06<00:00,  7.87trial/s, best loss: -0.9692546764261647]
best: {'colsample_bytree': 0.6261779516190094, 'learning_rate': 0.1884711944918465, 'max_depth': 18.0, 'min_child_weight': 1.0}


In [17]:
print('colsample_bytree:', round(best['colsample_bytree'], 5))
print('learning_rate:', round(best['learning_rate'], 5))
print('max_depth:', int(best['max_depth']))
print('min_child_weight:', int(best['min_child_weight']))

colsample_bytree: 0.62618
learning_rate: 0.18847
max_depth: 18
min_child_weight: 1


In [18]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, \
                            recall_score, f1_score, roc_auc_score

def get_clf_eval(y_test, pred=None, pred_proba=None):
    confusion = confusion_matrix(y_test, pred)
    accuracy = accuracy_score(y_test, pred)
    precision = precision_score(y_test, pred)
    recall = recall_score(y_test, pred)
    f1 = f1_score(y_test, pred)
    roc_auc = roc_auc_score(y_test, pred_proba)
    print(f'confusion matrix\n{confusion}')
    print(f'accuracy: {accuracy:.4f}, precision: {precision:.4f}, recall: {recall:.4f}')
    print(f'F1: {f1:.4f}, AUC: {roc_auc:.4f}')

In [19]:
params= {
    'n_estimators': 400, 
    'max_depth': int(best['max_depth']),
    'min_child_weight': int(best['min_child_weight']),
    'learning_rate': round(best['learning_rate'], 5),
    'colsample_bytree': round(best['colsample_bytree'], 5),
}

xgb_wrapper = XGBClassifier(**params)

evals = [(x_tr, y_tr), (x_val, y_val)]
xgb_wrapper.fit(x_tr, y_tr, early_stopping_rounds=50, 
                eval_metric='logloss', eval_set=evals, verbose=True)

[0]	validation_0-logloss:0.539489	validation_1-logloss:0.591884
Multiple eval metrics have been passed: 'validation_1-logloss' will be used for early stopping.

Will train until validation_1-logloss hasn't improved in 50 rounds.
[1]	validation_0-logloss:0.434804	validation_1-logloss:0.534431
[2]	validation_0-logloss:0.354013	validation_1-logloss:0.467076
[3]	validation_0-logloss:0.292326	validation_1-logloss:0.419636
[4]	validation_0-logloss:0.243379	validation_1-logloss:0.396422
[5]	validation_0-logloss:0.203978	validation_1-logloss:0.366363
[6]	validation_0-logloss:0.172523	validation_1-logloss:0.343495
[7]	validation_0-logloss:0.147538	validation_1-logloss:0.328842
[8]	validation_0-logloss:0.126727	validation_1-logloss:0.313408
[9]	validation_0-logloss:0.108991	validation_1-logloss:0.294391
[10]	validation_0-logloss:0.094578	validation_1-logloss:0.283
[11]	validation_0-logloss:0.083626	validation_1-logloss:0.279206
[12]	validation_0-logloss:0.073678	validation_1-logloss:0.271597
[13

XGBClassifier(colsample_bytree=0.62618, learning_rate=0.18847, max_depth=18,
              n_estimators=400)

In [20]:
preds = xgb_wrapper.predict(x_test)
pred_proba = xgb_wrapper.predict_proba(x_test)[:, 1]

get_clf_eval(y_test, preds, pred_proba)

confusion matrix
[[33  4]
 [ 2 75]]
accuracy: 0.9474, precision: 0.9494, recall: 0.9740
F1: 0.9615, AUC: 0.9898


In [21]:
losses = [loss_dict['loss'] for loss_dict in trial_val.results]

params = {
    'max_depth': trial_val.vals['max_depth'],
    'min_child_weight': trial_val.vals['min_child_weight'],
    'colsample_bytree': trial_val.vals['colsample_bytree'],
    'learning_rate': trial_val.vals['learning_rate'],
    'losses': losses
}

result_df = pd.DataFrame(params)

In [22]:
result_df

Unnamed: 0,max_depth,min_child_weight,colsample_bytree,learning_rate,losses
0,19.0,2.0,0.585235,0.033688,-0.94291
1,5.0,2.0,0.727186,0.105956,-0.962661
2,6.0,2.0,0.959945,0.154804,-0.960454
3,6.0,2.0,0.950012,0.120686,-0.956068
4,16.0,2.0,0.674336,0.142392,-0.960468
5,8.0,2.0,0.863774,0.106579,-0.962661
6,14.0,2.0,0.957521,0.079111,-0.958275
7,19.0,2.0,0.695018,0.095213,-0.964869
8,9.0,2.0,0.684442,0.14752,-0.962661
9,8.0,1.0,0.592116,0.081179,-0.95829
