# Automated Hyperparameter Tuning

### Bayesian Optimization

In [22]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score

In [9]:
df = pd.read_csv('iris.csv')
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


## Train Test Split

In [10]:
x = df.drop('species',axis=1)
y = df['species']

In [11]:
x.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [12]:
y.head()

0    setosa
1    setosa
2    setosa
3    setosa
4    setosa
Name: species, dtype: object

In [14]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=25)

### Apply Bayesian Optimization

In [15]:
from hyperopt import tpe,fmin,Trials,STATUS_OK,hp

In [16]:
#Domain Scpace 
space = {
    'criterion':hp.choice('criterion',['entropy','gini']),
    'max_depth':hp.quniform('max_depth',10,1200,10),
    'max_feature':hp.choice('max_features',['auto','sqrt','log2',None]),
    'min_samples_leaf':hp.uniform('min_samples_leaf',0,0.5),
    'min_samples_split':hp.uniform('min_samples_split',0,1),
    'n_estimators':hp.choice('n_estimators',[10,50,300,530,750,1000,1200])    
}

In [17]:
def objactive(space):
    model = RandomForestClassifier(criterion=space['criterion'],
                                  max_depth=space['max_depth'],
                                  max_features=space['max_feature'],
                                  min_samples_leaf=space['min_samples_leaf'],
                                  min_samples_split=space['min_samples_split'],
                                  n_estimators=space['n_estimators']
                                  )
    accuracy = cross_val_score(model,x_train,y_train,cv=5).mean()
    
    return {'loss': -accuracy , 'status': STATUS_OK }

In [18]:
from sklearn.model_selection import cross_val_score
trials = Trials()
best = fmin(fn=objactive,
           space=space,
           algo=tpe.suggest,
           max_evals=70,
           trials=trials)
best

100%|███████████████████████████████████████████████| 70/70 [02:26<00:00,  2.09s/trial, best loss: -0.9644268774703558]


{'criterion': 0,
 'max_depth': 490.0,
 'max_features': 1,
 'min_samples_leaf': 0.20745321824590857,
 'min_samples_split': 0.0374930448477952,
 'n_estimators': 4}

In [19]:
crit = {0:'entropy',1:'gini'}
feat = {0:'auto',1:'sqrt',2:'log2',3:None}
est = {0:10,1:50,2:300,3:530,4:750,5:1000,6:1200}
print(crit[best['criterion']])
print(feat[best['max_features']])
print(est[best['n_estimators']])

entropy
sqrt
750


In [20]:
train_rf = RandomForestClassifier(criterion=crit[best['criterion']],
                                max_depth=best['max_depth'],
                                max_features=feat[best['max_features']],
                                min_samples_leaf=best['min_samples_leaf'],
                                min_samples_split=best['min_samples_split'],
                                n_estimators=est[best['n_estimators']]                               
                                )
train_rf.fit(x_train,y_train)
pred_rf = train_rf.predict(x_test)

In [23]:
print(confusion_matrix(y_test,pred_rf),'\n')
print(accuracy_score(y_test,pred_rf),'\n')
print(classification_report(y_test,pred_rf))

[[11  0  0]
 [ 0 13  3]
 [ 0  0 11]] 

0.9210526315789473 

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        11
  versicolor       1.00      0.81      0.90        16
   virginica       0.79      1.00      0.88        11

    accuracy                           0.92        38
   macro avg       0.93      0.94      0.93        38
weighted avg       0.94      0.92      0.92        38

