# Tuning Hyperparameters

In [1]:
from sklearn.datasets import make_classification

X, Y = make_classification(n_samples = 200,
                          n_classes = 2,
                          n_features = 10,
                          n_redundant = 0,
                          random_state = 1)

In [2]:
X.shape, Y.shape

((200, 10), (200,))

Data split 80/20 ratio

In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

In [4]:
X_train.shape, Y_train.shape

((160, 10), (160,))

In [5]:
X_test.shape, Y_test.shape

((40, 10), (40,))

In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [8]:
rf = RandomForestClassifier(max_features = 5, n_estimators = 100)

In [9]:
rf.fit(X_train, Y_train)

RandomForestClassifier(max_features=5)

In [10]:
rf.score(X_test, Y_test)

0.875

In [11]:
Y_pred = rf.predict(X_test)

In [12]:
accuracy_score(Y_pred, Y_test)

0.875

In [13]:
Y_pred, Y_test

(array([1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1,
        0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1]),
 array([1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,
        0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1]))

# Hyperparameter Tuning

In [14]:
from sklearn.model_selection import GridSearchCV
import numpy as np

In [17]:
max_features_range = np.arange(1, 6, 1)
n_estimators_range = np.arange(10, 210, 10)
param_grid = dict(max_features = max_features_range, n_estimators = n_estimators_range)

rf = RandomForestClassifier()

grid = GridSearchCV(estimator = rf, param_grid = param_grid, cv = 5)

In [18]:
grid.fit(X_train, Y_train)

GridSearchCV(cv=5, estimator=RandomForestClassifier(),
             param_grid={'max_features': array([1, 2, 3, 4, 5]),
                         'n_estimators': array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120, 130,
       140, 150, 160, 170, 180, 190, 200])})

In [19]:
print("The best parameters are %s with a score of %0.2f"
      % (grid.best_params_, grid.best_score_))

The best parameters are {'max_features': 1, 'n_estimators': 200} with a score of 0.90


In [20]:
import pandas as pd

grid_results = pd.concat([pd.DataFrame(grid.cv_results_["params"]),pd.DataFrame(grid.cv_results_["mean_test_score"], columns=["Accuracy"])],axis=1)
grid_results.head()

Unnamed: 0,max_features,n_estimators,Accuracy
0,1,10,0.80625
1,1,20,0.81875
2,1,30,0.7875
3,1,40,0.8625
4,1,50,0.86875


In [21]:
grid_contour = grid_results.groupby(['max_features','n_estimators']).mean()
grid_contour

Unnamed: 0_level_0,Unnamed: 1_level_0,Accuracy
max_features,n_estimators,Unnamed: 2_level_1
1,10,0.80625
1,20,0.81875
1,30,0.78750
1,40,0.86250
1,50,0.86875
...,...,...
5,160,0.90000
5,170,0.88750
5,180,0.89375
5,190,0.90000


In [22]:
grid_reset = grid_contour.reset_index()
grid_reset.columns = ['max_features', 'n_estimators', 'Accuracy']
grid_pivot = grid_reset.pivot('max_features', 'n_estimators')
grid_pivot

Unnamed: 0_level_0,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy
n_estimators,10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200
max_features,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
1,0.80625,0.81875,0.7875,0.8625,0.86875,0.86875,0.89375,0.86875,0.88125,0.88125,0.875,0.86875,0.85625,0.8875,0.86875,0.8875,0.88125,0.88125,0.8875,0.9
2,0.83125,0.86875,0.89375,0.85,0.86875,0.8875,0.875,0.88125,0.88125,0.875,0.86875,0.875,0.875,0.8875,0.8875,0.89375,0.88125,0.8625,0.89375,0.88125
3,0.86875,0.875,0.8625,0.8625,0.8875,0.86875,0.88125,0.875,0.86875,0.875,0.86875,0.88125,0.89375,0.875,0.8875,0.88125,0.89375,0.89375,0.89375,0.8875
4,0.86875,0.875,0.8875,0.9,0.8625,0.88125,0.875,0.89375,0.88125,0.9,0.9,0.89375,0.875,0.875,0.89375,0.89375,0.88125,0.8875,0.88125,0.9
5,0.8625,0.8625,0.86875,0.89375,0.88125,0.9,0.8875,0.8875,0.89375,0.89375,0.89375,0.8875,0.8875,0.89375,0.8875,0.9,0.8875,0.89375,0.9,0.8875
