In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.grid_search import GridSearchCV

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sb

train = pd.read_csv('LargeTrain.csv')
train['Class'] = [ i - 1 for i in train['Class']]
target = 'Class'
predictors = [col for col in train.columns if col not in target]

In [None]:
param_test1 = {
    'max_depth':[2, 4, 6, 8], 
    'min_samples_split':[200, 400, 600, 800]
}
gsearch1 = GridSearchCV(estimator = GradientBoostingClassifier(learning_rate=0.01, 
                                                               min_samples_split=500,
                                                               min_samples_leaf=50, 
                                                               max_depth=8, 
                                                               max_features='sqrt',
                                                               subsample=0.8, 
                                                               random_state=10,
                                                               seed=27), 
                                                               param_grid = param_test1, 
                                                               scoring='accuracy', 
                                                               n_jobs=4, 
                                                               iid=False, 
                                                               cv=5)
gsearch1.fit(train[predictors],train[target])
gsearch1.grid_scores_, gsearch1.best_params_, gsearch1.best_score_

In [None]:
param_test2 = {'n_estimators':[100, 120, 140, 160, 180, 200], 'min_samples_leaf':[20, 30, 40, 50, 60, 70] }
gsearch2 = GridSearchCV(estimator = GradientBoostingClassifier(learning_rate=0.01, 
                                                               min_samples_split=200,
                                                               min_samples_leaf=50, 
                                                               max_depth=9, 
                                                               max_features='sqrt',
                                                               subsample=0.8, 
                                                               random_state=10,
                                                               seed = 27), 
                                                               param_grid = param_test2, 
                                                               scoring='accuracy', 
                                                               n_jobs=4, 
                                                               iid=False, 
                                                               cv=5)
gsearch2.fit(train[predictors],train[target])
gsearch2.grid_scores_, gsearch2.best_params_, gsearch2.best_score_

In [None]:
grid_visualization1 = []
for grid_pair in gsearch1.grid_scores_:
    grid_visualization1.append(grid_pair.mean_validation_score)
    
grid_visualization1 = np.array(grid_visualization1)
grid_visualization1.shape = (4, 4)
sb.heatmap(grid_visualization1, cmap='YlGnBu')
plt.xticks(np.arange(4) + 0.5, gsearch1.param_grid['min_samples_split'])
plt.yticks(np.arange(4) + 0.5, gsearch1.param_grid['max_depth'][::-1])
plt.xlabel('min_samples_split')
plt.ylabel('max_depth')

In [None]:
grid_visualization2 = []
for grid_pair in gsearch2.grid_scores_:
    grid_visualization2.append(grid_pair.mean_validation_score)
    
grid_visualization2 = np.array(grid_visualization2)
grid_visualization2.shape = (6, 6)
sb.heatmap(grid_visualization2, cmap='YlGnBu')
plt.xticks(np.arange(6) + 0.5, gsearch2.param_grid['n_estimators'])
plt.yticks(np.arange(6) + 0.5, gsearch2.param_grid['min_samples_leaf'][::-1])
plt.xlabel('n_estimators')
plt.ylabel('n_samples_leaf')