### Hyperparameter Optimization

1. GridsearchCV
2. RandomizedSearch
3. Optuna
4. HyperOpt


In [1]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import f1_score, mean_squared_error


from sklearn.model_selection import GridSearchCV,RandomizedSearchCV

from sklearn import datasets

In [13]:
data = datasets.load_diabetes(as_frame=True)['data']
y = datasets.load_diabetes()['target']

In [None]:
reg = RandomForestRegressor(random_state= 42, n_jobs= -1)

parameters = {
    
    'n_estimators': list(range(10,60,10)),
    'max_depth': list(range(2,12,2)),
    'criterion': ['squared_error','absolute_error'],
    'max_features': ['sqrt', 'log2']
}

model = GridSearchCV(estimator=reg, param_grid=parameters,
                     n_jobs= 1, cv=5, verbose=10, return_train_score= True,
                     scoring= 'neg_root_mean_squared_error')

model.fit(X = data, y = y)

In [20]:
model.best_params_

{'criterion': 'squared_error',
 'max_depth': 4,
 'max_features': 'sqrt',
 'n_estimators': 50}

In [21]:
model.best_score_

-56.41173460834938

In [22]:
model.best_estimator_

In [23]:
model.best_index_

14

### Classification

In [None]:
data = datasets.load_breast_cancer(as_frame=True)['data']
y = datasets.load_breast_cancer()['target']


classifier = RandomForestClassifier(random_state= 2, n_jobs= 1)


param_grid = {
    'n_estimators': list(range(10,60,10)),
    'max_depth': list(range(2,12, 2)),
    'criterion' : ['gini', 'entropy']
}

model_classifier = GridSearchCV(estimator= classifier, param_grid=param_grid,
                                scoring='f1',n_jobs=1, verbose = 10,
                                cv = 5, return_train_score=True
                                )
model_classifier.fit(data, y)

### Randomized Search

In [15]:
from xgboost import XGBClassifier, XGBRegressor
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold

# loading the dataset
X = datasets.load_breast_cancer(as_frame=True)['data']
y = datasets.load_breast_cancer()['target']

# setting up the cross validation
s_fold = StratifiedKFold(n_splits= 10, shuffle= True, random_state= 30)

# setting up the model
xgb_classifier = XGBClassifier(random_state = 23, n_jobs = 1)


# setting up the search

parameters = {
    'n_estimators': list(range(10,2000,10)),
    'learning_rate': [x/100 for x in range(10, 95, 5)],
    'max_depth' : list(range(2,50,2)),
    'max_delta_step': list(range(0,50,1)),
    'subsample': [0,1]
}

# training and tuning the model

model = RandomizedSearchCV(estimator= xgb_classifier, param_distributions= parameters,
                           n_iter=40, scoring='f1', n_jobs=1, cv = s_fold.split(X,y),
                           verbose= 3, random_state= 32, return_train_score= True)

model.fit(X, y)

best_model = model.best_estimator_
best_score = model.best_score_
best_parameters = model.best_params_

for i in [best_score,best_parameters]:
    print(i)

Fitting 10 folds for each of 40 candidates, totalling 400 fits
[CV 1/10] END learning_rate=0.75, max_delta_step=9, max_depth=40, n_estimators=470, subsample=1;, score=(train=1.000, test=0.986) total time=   0.9s
[CV 2/10] END learning_rate=0.75, max_delta_step=9, max_depth=40, n_estimators=470, subsample=1;, score=(train=1.000, test=0.986) total time=   0.9s
[CV 3/10] END learning_rate=0.75, max_delta_step=9, max_depth=40, n_estimators=470, subsample=1;, score=(train=1.000, test=0.986) total time=   0.4s
[CV 4/10] END learning_rate=0.75, max_delta_step=9, max_depth=40, n_estimators=470, subsample=1;, score=(train=1.000, test=0.973) total time=   0.3s
[CV 5/10] END learning_rate=0.75, max_delta_step=9, max_depth=40, n_estimators=470, subsample=1;, score=(train=1.000, test=0.986) total time=   0.4s
[CV 6/10] END learning_rate=0.75, max_delta_step=9, max_depth=40, n_estimators=470, subsample=1;, score=(train=1.000, test=0.960) total time=   0.4s
[CV 7/10] END learning_rate=0.75, max_delta

In [18]:
### RANDOMIZED SEARCH FOR REGRESSION MODELS


#loading  theh datasets
reg_X = datasets.load_diabetes(as_frame=True)['data']
reg_y = datasets.load_diabetes()['target']

#instantiating the model
reg_xgb = XGBRegressor(random_state = 23, n_jobs = 1)

# setting up cross validation
fold = StratifiedKFold(n_splits=6, shuffle= True, random_state=32)

params = {
     'n_estimators': list(range(10,2000,10)),
    'learning_rate': [x/100 for x in range(10, 95, 5)],
    'max_depth' : list(range(2,50,2)),
    'colsample_bytree': [0,1],
    'subsample': [0,1],
    'colsample_bynode': [0,1],
    'lambda': list(range(1,10,1))
}


# setting up the model
reg_model = RandomizedSearchCV(estimator= reg_xgb, param_distributions= params,
                               n_iter= 10, scoring='neg_root_mean_squared_error',
                               n_jobs=1, cv = fold.split(reg_X, reg_y), verbose=3,
                               random_state=32, return_train_score= True)

reg_model.fit(reg_X, reg_y)

best_rmse = reg_model.best_score_
best_params = reg_model.best_params_
best_reg_model = reg_model.best_estimator_

for i in [best_rmse, best_params]:
    print(i)



Fitting 6 folds for each of 10 candidates, totalling 60 fits
[CV 1/6] END colsample_bynode=1, colsample_bytree=1, lambda=2, learning_rate=0.15, max_depth=42, n_estimators=1300, subsample=1;, score=(train=-0.001, test=-64.362) total time=   4.0s
[CV 2/6] END colsample_bynode=1, colsample_bytree=1, lambda=2, learning_rate=0.15, max_depth=42, n_estimators=1300, subsample=1;, score=(train=-0.001, test=-62.894) total time=   2.6s
[CV 3/6] END colsample_bynode=1, colsample_bytree=1, lambda=2, learning_rate=0.15, max_depth=42, n_estimators=1300, subsample=1;, score=(train=-0.001, test=-66.880) total time=   2.6s
[CV 4/6] END colsample_bynode=1, colsample_bytree=1, lambda=2, learning_rate=0.15, max_depth=42, n_estimators=1300, subsample=1;, score=(train=-0.001, test=-64.575) total time=   2.5s
[CV 5/6] END colsample_bynode=1, colsample_bytree=1, lambda=2, learning_rate=0.15, max_depth=42, n_estimators=1300, subsample=1;, score=(train=-0.001, test=-58.584) total time=   3.5s
[CV 6/6] END colsam

[0.1,
 0.15,
 0.2,
 0.25,
 0.3,
 0.35,
 0.4,
 0.45,
 0.5,
 0.55,
 0.6,
 0.65,
 0.7,
 0.75,
 0.8,
 0.85,
 0.9]

In [8]:
nums

[0.1,
 0.15,
 0.2,
 0.25,
 0.3,
 0.35,
 0.4,
 0.45,
 0.5,
 0.55,
 0.6,
 0.65,
 0.7,
 0.75,
 0.8,
 0.85,
 0.9]