https://towardsdatascience.com/improve-your-model-performance-with-bayesian-optimization-hyperparameter-tuning-4dbd7fe25b62

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error
from scipy.stats import randint, uniform
import seaborn as sns
import matplotlib.pyplot as plt
def parameter_over_iterations(model_result):
  '''
  This function is generating a subplots with the hyperparameter values for each iteration and the overall performance score.
  The performance score is the difference between the best performing model and the worst performing model
  
  model_result: CV object
  '''
  param_list = list(model_result.cv_results_['params'][0].keys())
  max_col_plot = 2
  row_plot =int(np.ceil((len(param_list) + 1)/max_col_plot))
  fig, axs = plt.subplots(nrows=row_plot, ncols=np.min((max_col_plot, (len(param_list) + 1))), figsize=(30,12))
  for i, ax in enumerate(axs.flatten()):
    if i == len(param_list):
      break
    par = param_list[i]
    param_val = list()
    for par_dict in model_result.cv_results_['params']:
      param_val.append(par_dict[par])
    sns.barplot(y=param_val, x=np.arange(len(param_val)), ax=ax)
    ax.set_title(par)
  dt = pd.DataFrame({key:val for key,  val in model_result.cv_results_.items() if key.startswith('split')})
  mean_metric = dt.mean(axis=1)
  sns.barplot(y=(mean_metric.values + abs(np.min(mean_metric.values))), x=np.arange(len(mean_metric) ), ax=axs.flatten()[i])
  axs.flatten()[i].set_title('overall metric')

## Grid Search
In this section, we will see the results of using GridSearch to select the best hyperparameters.

In [5]:
param_test = {'max_depth':range(5,15,5), 'min_samples_split':range(200,800,300), 'learning_rate': np.arange(0.05,0.55,0.25), 'subsample': np.arange(0.4,1,0.4),
              'max_features': np.arange(0.4,1,0.3), 'n_estimators': np.arange(40,160,60)}
gsearch = GridSearchCV(estimator = GradientBoostingRegressor(random_state=10),param_grid = param_test, scoring='neg_mean_absolute_error',n_jobs=4,iid=False, cv=5)
gsearch.fit(X_train,y_train)
parameter_over_iterations(gsearch)

NameError: name 'X_train' is not defined

## Random Search
RandomSearch should lead to better results than GridSearch, even though typically it is not able to reach the global optima of the unknown function.

In [None]:
param_distrib = {'max_depth':randint(5,15), 'min_samples_split':randint(200,800), 'learning_rate': uniform(loc=0.05, scale=0.50), 'subsample': uniform(loc=0.4, scale=0.6),
              'max_features': uniform(loc=0.4, scale=0.6), 'n_estimators': randint(40,160)}
rsearch = RandomizedSearchCV(estimator = GradientBoostingRegressor(random_state=10), 
param_distributions = param_distrib, scoring='neg_mean_absolute_error',n_jobs=4, n_iter=64,iid=False, cv=5)
rsearch.fit(X_train,y_train)
parameter_over_iterations(rsearch)

Bayesian Optimization

Now is time to test the Bayesian optimization algorithm to tune the model.

In [None]:
from skopt import BayesSearchCV
from skopt.space import Real, Integer
optimizer_kwargs = {'acq_func_kwargs':{"xi": 10, "kappa": 10}}
space  = {'max_depth':Integer(5, 15),
          'learning_rate':Real(0.05, 0.55, "uniform"),
          'min_samples_split':Integer(200, 800),
          'subsample': Real(0.4, 1, "uniform"),
          'max_features': Real(0.4, 1, "uniform"),
          'n_estimators': Integer(40, 160)}
bsearch = BayesSearchCV(estimator = GradientBoostingRegressor(random_state=10), 
search_spaces = space, scoring='neg_mean_absolute_error',n_jobs=4, n_iter=64,iid=False, cv=5, optimizer_kwargs=optimizer_kwargs)
bsearch.fit(X_train,y_train)
parameter_over_iterations(bsearch)