# Automated Hyperparameter Tuning

### Genetic Algorithm

In [10]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score

In [2]:
df = pd.read_csv('iris.csv')
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [17]:
df['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [18]:
df['species'] = df['species'].replace({'setosa':0, 'versicolor':2, 'virginica':3})

## Train Test Split

In [19]:
x = df.drop('species',axis=1)
y = df['species']

In [20]:
x.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [21]:
y.head()

0    0
1    0
2    0
3    0
4    0
Name: species, dtype: int64

In [22]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=25)

### Apply Genetic Algorithm

In [7]:
from tpot import TPOTClassifier



In [11]:
n_estimators = [int(x) for x in np.linspace(200,2000,10)]
max_features = ['auto','sqrt','log2']
max_depth = [int(x) for x in np.linspace(10,1000,10)]
min_samples_split = [1,2,3,4,5,6,7,8,9,10]
min_samples_leaf = [1,2,3,4,5,6,7,8,9,10]

In [12]:
#Create random Grid
tpot_grid = {'n_estimators':n_estimators,
              'max_features':max_features,
              'max_depth':max_depth,
              'min_samples_split':min_samples_split,
              'min_samples_leaf':min_samples_leaf,
              'criterion':['entropy','gini']
              }
print(tpot_grid)

{'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth': [10, 120, 230, 340, 450, 560, 670, 780, 890, 1000], 'min_samples_split': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'min_samples_leaf': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'criterion': ['entropy', 'gini']}


In [23]:
tpot_class = TPOTClassifier(generations=5,population_size=24,offspring_size=12,
                           verbosity=2,early_stop=12,
                           config_dict={'sklearn.ensemble.RandomForestClassifier':tpot_grid},
                           cv=4,scoring='accuracy')
tpot_class.fit(x_train,y_train)

HBox(children=(FloatProgress(value=0.0, description='Optimization Progress', max=84.0, style=ProgressStyle(des…


Generation 1 - Current best internal CV score: 0.9642857142857143
Generation 2 - Current best internal CV score: 0.9642857142857143
Generation 3 - Current best internal CV score: 0.9642857142857144
Generation 4 - Current best internal CV score: 0.9732142857142858
Generation 5 - Current best internal CV score: 0.9732142857142858
Best pipeline: RandomForestClassifier(RandomForestClassifier(input_matrix, criterion=entropy, max_depth=1000, max_features=log2, min_samples_leaf=5, min_samples_split=6, n_estimators=1600), criterion=gini, max_depth=450, max_features=auto, min_samples_leaf=3, min_samples_split=7, n_estimators=1000)


TPOTClassifier(config_dict={'sklearn.ensemble.RandomForestClassifier': {'criterion': ['entropy',
                                                                                      'gini'],
                                                                        'max_depth': [10,
                                                                                      120,
                                                                                      230,
                                                                                      340,
                                                                                      450,
                                                                                      560,
                                                                                      670,
                                                                                      780,
                                                                                 

In [25]:
accuracy = tpot_class.score(x_test,y_test)
print(accuracy)

0.9736842105263158
