# Bayesian Optimazation Classification and Regression
> From now, stop using GridSearch and RandomSearch 

- toc: true 
- badges: true
- comments: true
- categories: [Bayesian]

In [2]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from bayes_opt import BayesianOptimization
from sklearn.datasets import make_classification, make_regression
from sklearn.model_selection import cross_val_score

import warnings
warnings.simplefilter('ignore')

# Classification Problem Bayesian Optimazation

In [12]:
X, y = make_classification(n_samples=10000, n_features=10, n_classes=2)

We using the default hyperparameter to fit the data

In [13]:
rfc = RandomForestClassifier()
np.mean(cross_val_score(rfc, X, y, cv=5, scoring='roc_auc'))

0.9897763781114314

Default hyperparameter perform aroudn `0.98` ROC_AUC, next step we use `Bayesian Optimazation` to fine turn the hyperparameter. 

## Define the `blackBox` function

In [14]:
def rfc_cv(n_estimators, min_samples_split, max_features, max_depth):
    val = np.mean(cross_val_score(RandomForestClassifier(n_estimators=int(n_estimators), 
                                                 min_samples_split=int(min_samples_split),
                                                 max_features=min(max_features, 0.999),
                                                 max_depth=int(max_depth), random_state=42),
                         X, y, scoring='roc_auc', cv=5))
    
    return val

In [15]:
# define Bayesian Optimazation
rfc_bo = BayesianOptimization(
             rfc_cv,
             {'n_estimators': (10, 250),
             'min_samples_split': (2, 25),
             'max_features': (0.1, 0.999),
             'max_depth': (5, 30)})

In [16]:
# start the optimazation
rfc_bo.maximize()

|   iter    |  target   | max_depth | max_fe... | min_sa... | n_esti... |
-------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.9904  [0m | [0m 18.08   [0m | [0m 0.3725  [0m | [0m 2.766   [0m | [0m 249.1   [0m |
| [0m 2       [0m | [0m 0.9898  [0m | [0m 15.31   [0m | [0m 0.6883  [0m | [0m 24.87   [0m | [0m 84.16   [0m |
| [0m 3       [0m | [0m 0.99    [0m | [0m 12.87   [0m | [0m 0.416   [0m | [0m 24.62   [0m | [0m 51.27   [0m |
| [0m 4       [0m | [0m 0.9903  [0m | [0m 19.94   [0m | [0m 0.4735  [0m | [0m 5.652   [0m | [0m 244.5   [0m |
| [0m 5       [0m | [0m 0.9895  [0m | [0m 25.96   [0m | [0m 0.9587  [0m | [0m 12.6    [0m | [0m 244.6   [0m |
| [0m 6       [0m | [0m 0.9901  [0m | [0m 16.89   [0m | [0m 0.7025  [0m | [0m 2.421   [0m | [0m 244.6   [0m |
| [95m 7       [0m | [95m 0.9904  [0m | [95m 16.8    [0m | [95m 0.4017  [0m | [95m 6.605   [0m | [95m 248.0  

In [17]:
# check the best hyperparameter
rfc_bo.max

{'target': 0.9905380799798376,
 'params': {'max_depth': 6.456055231994655,
  'max_features': 0.7869473158265811,
  'min_samples_split': 22.558253615710782,
  'n_estimators': 17.814015466174588}}

In [18]:
rfc_Optimazed = RandomForestClassifier(n_estimators=18, max_depth=6, max_features=0.78, min_samples_split=22)

In [19]:
np.mean(cross_val_score(rfc_Optimazed, X, y, cv=5, scoring='roc_auc'))

0.9900614797906387

* Original `roc_auc`: 0.989776
* Optimized `roc_auc`: 0.99006

# Regression Problem Bayesian Optimazation

In [3]:
X, y = make_regression(n_samples=10000, n_features=10)

In [4]:
rfe = RandomForestRegressor()
np.mean(cross_val_score(rfe, X, y, cv=5, scoring='neg_mean_squared_error'))

-1409.2889528620326

## Define the `blackbox` function

In [5]:
def rfe_cv(n_estimators, min_samples_split, max_features, max_depth):
    val = np.mean(cross_val_score(RandomForestRegressor(n_estimators=int(n_estimators), 
                                                 min_samples_split=int(min_samples_split),
                                                 max_features=min(max_features, 0.999),
                                                 max_depth=int(max_depth), random_state=42),
                         X, y, scoring='neg_mean_squared_error', cv=5))
    
    return val

In [None]:
score = rfe_cv(n_estimators=100, min_samples_split=10, max_depth=6, max_features=0.78)
score

In [6]:
# define Bayesian Optimazation
rfe_bo = BayesianOptimization(
             rfe_cv,
             {'n_estimators': (10, 250),
             'min_samples_split': (2, 25),
             'max_features': (0.1, 0.999),
             'max_depth': (5, 30)})

In [8]:
# start the optimazation
rfe_bo.maximize()

|   iter    |  target   | max_depth | max_fe... | min_sa... | n_esti... |
-------------------------------------------------------------------------
| [0m 1       [0m | [0m-2.702e+0[0m | [0m 22.15   [0m | [0m 0.2902  [0m | [0m 20.4    [0m | [0m 211.9   [0m |
| [0m 2       [0m | [0m-2.788e+0[0m | [0m 25.96   [0m | [0m 0.2216  [0m | [0m 22.83   [0m | [0m 166.3   [0m |
| [95m 3       [0m | [95m-1.651e+0[0m | [95m 11.15   [0m | [95m 0.8612  [0m | [95m 10.86   [0m | [95m 153.0   [0m |
| [0m 4       [0m | [0m-5.608e+0[0m | [0m 5.331   [0m | [0m 0.4747  [0m | [0m 10.79   [0m | [0m 49.97   [0m |
| [0m 5       [0m | [0m-1.862e+0[0m | [0m 12.6    [0m | [0m 0.9883  [0m | [0m 21.26   [0m | [0m 124.8   [0m |
| [0m 6       [0m | [0m-5.684e+0[0m | [0m 5.0     [0m | [0m 0.999   [0m | [0m 2.0     [0m | [0m 250.0   [0m |
| [95m 7       [0m | [95m-1.568e+0[0m | [95m 12.54   [0m | [95m 0.9491  [0m | [95m 10.55   [0m | [95m 1

In [9]:
rfe_bo.max

{'target': -1390.7849548765093,
 'params': {'max_depth': 28.70255259053527,
  'max_features': 0.8468279746142502,
  'min_samples_split': 2.1219418980976834,
  'n_estimators': 140.748505191585}}

In [10]:
# use the best hyperparameter
rfe = RandomForestRegressor(n_estimators=140, max_depth=29, max_features=0.84, min_samples_split=2)

In [11]:
np.mean(cross_val_score(rfe, X, y, cv=5, scoring='neg_mean_squared_error'))

-1383.4479089516929

* Origin `neg_mean_squared_error`: -1409.2889528620326
* Optimazed `neg_mean_squared_error`: -1383.4479089516929