# Search Analysis with Hyperopt - Trials

Hyperopt comes with a few plotting functions that help analyze the search. We will showcase those here, although in my opinion they are not great, and also, there is no documentation other than the source code.

We will use the trials class to store information about the search.

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import cross_val_score, train_test_split

import xgboost as xgb

In [3]:
# hp: define the hyperparameter space
# fmin: optimization function
# Trials: to evaluate the different searched hyperparameters
from hyperopt import hp, fmin, Trials

# the search algorithms
from hyperopt import rand, anneal, tpe

# for the search
from hyperopt import STATUS_OK, STATUS_FAIL

In [4]:
# load dataset

breast_cancer_X, breast_cancer_y = load_breast_cancer(return_X_y=True)
X = pd.DataFrame(breast_cancer_X)
y = pd.Series(breast_cancer_y).map({0:1, 1:0})

X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [5]:
# the target:
# percentage of benign (0) and malign tumors (1)

y.value_counts() / len(y)

0    0.627417
1    0.372583
dtype: float64

In [6]:
# split dataset into a train and test set

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=0)

X_train.shape, X_test.shape

((398, 30), (171, 30))

### Define the Hyperparameter Space

* Hyperopt search space

* xgb.XGBClassifier hyperparameters

* xgb general parameters

In [7]:
# determine the hyperparameter space

param_grid = {
    'n_estimators': hp.quniform('n_estimators', 200, 2500, 100),
    'max_depth': hp.quniform('max_depth', 1, 10, 1),
    'learning_rate': hp.loguniform('learning_rate', np.log(0.001), np.log(1)),
    'booster': hp.choice('booster', ['gbtree', 'dart']),
    'gamma': hp.loguniform('gamma', np.log(0.01), np.log(10)),
    'subsample': hp.uniform('subsample', 0.50, 0.90),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.50, 0.99),
    'colsample_bylevel': hp.uniform('colsample_bylevel', 0.50, 0.99),
    'colsample_bynode': hp.uniform('colsample_bynode', 0.50, 0.99),
    'reg_lambda': hp.uniform('reg_lambda', 1, 20)
}

### Define the objective function

This is the hyperparameter response space, the function we want to minimize.

In [8]:
# the objective function takes the hyperparameter space
# as input

def objective(params):

    # we need a dictionary to indicate which value from the space
    # to attribute to each value of the hyperparameter in the xgb
    params_dict = {
        # important int, as it takes integers only
        'n_estimators': int(params['n_estimators']),
        # important int, as it takes integers only
        'max_depth': int(params['max_depth']),
        'learning_rate': params['learning_rate'],
        'booster': params['booster'],
        'gamma': params['gamma'],
        'subsample': params['subsample'],
        'colsample_bytree': params['colsample_bytree'],
        'colsample_bylevel': params['colsample_bylevel'],
        'colsample_bynode': params['colsample_bynode'],
        'random_state': 1000,
    }

    # with ** we pass the items in the dictionary as parameters
    # to the xgb
    gbm = xgb.XGBClassifier(**params_dict)

    # train with cv
    cross_val_data = cross_val_score(
        gbm, X_train, y_train,
        scoring='accuracy', cv=3, n_jobs=4,
    )
    
    
    # === IMPORTANT ===
    # data to be returned by the search, we can add as much as we want
    
    loss = -cross_val_data.mean()
    loss_variance = cross_val_data.std()
    
    try:
        return {
            'loss': loss,
            'loss_variance':loss_variance,
            'status': STATUS_OK,
            }
    except Exception as e:
        return {
            'exception': str(e),
            'status': STATUS_FAIL,
            }

### Randomized Search

* fmin: returns the best hyperparameters found during the search.

* **rand** performs randomized search

* **Trials** captures the search information

In [11]:
# fmin performs the minimization
# rand.suggest samples the parameters at random
# i.e., performs the random search

trials = Trials()

random_search = fmin(
    fn=objective,
    space=param_grid,
    max_evals=50,
    rstate=np.random.default_rng(42),
    algo=rand.suggest,  # randomized search
    trials=trials,
)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  2%|▏         | 1/50 [04:50<3:57:15, 290.52s/trial, best loss: -0.9623110807321332]

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




  from pandas import MultiIndex, Int64Index
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  4%|▍         | 2/50 [06:30<2:22:45, 178.44s/trial, best loss: -0.9648173463962938]

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  4%|▍         | 2/50 [1:48:44<43:29:59, 3262.50s/trial, best loss: -0.9648173463962938]


KeyboardInterrupt: 

In [None]:
# the best hyperparameters

random_search

In [None]:
# the best hyperparameters can also be found in
# trials

trials.argmin

In [None]:
# the search hyperparameter combinations

pd.DataFrame(trials.vals).head()

In [None]:
# the results

pd.DataFrame(trials.results).head()

In [None]:
results = pd.concat([
    pd.DataFrame(trials.vals),
    pd.DataFrame(trials.results)],
    axis=1,
).sort_values(by='loss', ascending=False).reset_index(drop=True)

results.head()

### Plotting with hyperopt

In [None]:
from hyperopt import plotting as hpplot

In [None]:
hpplot.main_plot_history(trials)

In [None]:
hpplot.main_plot_histogram(trials)

In [None]:
hpplot.main_plot_vars(
    trials,
    do_show=True,
    fontsize=8,
    colorize_best=True,
    columns=3,
    arrange_by_loss=False,
)

### Home made plots

In [None]:
results = pd.concat([
    pd.DataFrame(trials.vals),
    pd.DataFrame(trials.results)],
    axis=1,
).sort_values(by='loss', ascending=False).reset_index(drop=True)

results['index'] = results.index

results.head()

In [None]:
results['loss'].plot()
plt.ylabel('Accuracy')
plt.xlabel('Hyperparam combination')

In [None]:
ax = sns.lineplot(x=r'index', y='loss', data=results)
ax.fill_between(
    results["index"],
    y1=results["loss"] - results["loss_variance"],
    y2=results["loss"] + results["loss_variance"],
    alpha=.5,
)
plt.xlabel('interation')
plt.title('Random Search')

In [None]:
for var in results.columns[:-4]:
    
    tmp = results.sort_values(by=var, ascending=True)
    
    ax = sns.lineplot(x=var, y='loss', data=tmp)
    ax.fill_between(
        tmp[var],
        y1=tmp["loss"] - tmp["loss_variance"],
        y2=tmp["loss"] + tmp["loss_variance"],
        alpha=.5,
    )
    plt.xlabel(var)
    plt.title('Random Search')
    plt.show()

In [None]:
sns.pairplot(results[results.columns[:-3]], hue ="loss", palette ='coolwarm')
plt.show()

### TPE
**tpe**: performs TPE search for hyperparameters

In [None]:
# fmin performs the minimization
# tpe.suggest samples the parameters

trials_tpe = Trials()

tpe_search = fmin(
    fn=objective,
    space=param_grid,
    max_evals=50,
    rstate=np.random.RandomState(42),
    algo=tpe.suggest,  # tpe
    trials=trials_tpe
)

tpe_search

### Plotting with hyperopt

In [None]:
hpplot.main_plot_history(trials_tpe)

In [None]:
hpplot.main_plot_histogram(trials_tpe)

In [None]:
hpplot.main_plot_vars(
    trials_tpe,
    do_show=True,
    fontsize=8,
    colorize_best=True,
    columns=3,
    arrange_by_loss=False,
)

### Home made plots

In [None]:
results = pd.concat([
    pd.DataFrame(trials_tpe.vals),
    pd.DataFrame(trials_tpe.results)],
    axis=1,
).sort_values(by='loss', ascending=False).reset_index(drop=True)

results['index'] = results.index

results.head()

In [None]:
results['loss'].plot()
plt.ylabel('Accuracy')
plt.xlabel('Hyperparam combination')

In [None]:
ax = sns.lineplot(x=r'index', y='loss', data=results)
ax.fill_between(
    results["index"],
    y1=results["loss"] - results["loss_variance"],
    y2=results["loss"] + results["loss_variance"],
    alpha=.5,
)
plt.xlabel('interation')
plt.title('Random Search')

In [None]:
for var in results.columns[:-4]:
    
    tmp = results.sort_values(by=var, ascending=True)
    
    ax = sns.lineplot(x=var, y='loss', data=tmp)
    ax.fill_between(
        tmp[var],
        y1=tmp["loss"] - tmp["loss_variance"],
        y2=tmp["loss"] + tmp["loss_variance"],
        alpha=.5,
    )
    plt.xlabel(var)
    plt.title('Random Search')
    plt.show()

In [None]:
results.columns[:-2]

In [None]:
sns.pairplot(results[results.columns[:-2]], hue ="loss", palette ='coolwarm')
plt.show()