In [4]:
# Import libraries:
import pandas as pd
import numpy as np
import xgboost as xgb
from xgboost.sklearn import XGBClassifier
from sklearn.model_selection import GridSearchCV   #Perforing grid search.

import matplotlib.pylab as plt
%matplotlib inline

from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 12, 4

In [5]:
# Load the data
train_df = pd.read_csv('./data/LargeTrain.csv', header=0)
target = 'Class'

In [6]:
# Choose all predictors except target
predictors = [x for x in train_df.columns if x not in [target]]

# initialize model
xgb_model = XGBClassifier( 
    learning_rate = 0.1,
    n_estimators = 140, 
    max_depth = 3, 
    min_child_weight = 1,
    gamma = 0,
    subsample = 0.8,
    colsample_bytree = 0.8,         
    objective = "multi:softmax",
    nthread = 4, 
    scale_pos_weight = 1, 
    seed = 27)

In [None]:
# Test the parameters - max_depth & min_child_weight
# Because those two parameters make much impact in the result
param_test1 = {
    'max_depth':range(3,11,2),
    'min_child_weight':(range(3,11,2))
}

gsearch1 = GridSearchCV(
    estimator = xgb_model,
    param_grid = param_test1,
    scoring = 'accuracy',
    n_jobs = 4,
    iid = False,
    cv = 5)

gsearch1.fit(train_df[predictors],train_df[target])

# Print the output
gsearch1.grid_scores_, gsearch1.best_params_, gsearch1.best_score_

In [None]:
# Update the xgb_model
xgb_model = gsearch1.best_estimator_

# Test the parameters - gamma
param_test2 = {
 'gamma':[i/10.0 for i in range(0,5)]
}

gsearch2 = GridSearchCV(
    estimator = xgb_model, 
    param_grid = param_test2, 
    scoring='accuracy',
    n_jobs=4,
    iid=False, 
    cv=5)

gsearch2.fit(train_df[predictors],train_df[target])
gsearch2.grid_scores_, gsearch2.best_params_, gsearch2.best_score_