# Simple example with GridSearchCV

In [7]:
# To support both python 2 and python 3
from __future__ import division

from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV

In [29]:
from sklearn.model_selection import train_test_split

iris = load_iris()
Y = iris.target
X = iris.data
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.5, random_state=42)

print len(X), len(x_train), len(x_test)

classifier = DecisionTreeClassifier(max_depth=15)

classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_train)
n_correct = sum(y_pred == y_train)
print "Training Results:",  n_correct / len(y_train)

y_pred = classifier.predict(x_test)
n_correct = sum(y_pred == y_test)
print "Test Results:", n_correct / len(y_pred)


150 75 75
Training Results: 1.0
Test Results: 0.933333333333


In [34]:
from sklearn.model_selection import StratifiedKFold
from sklearn.base import clone

skfolds = StratifiedKFold(n_splits=5, random_state=42)

for train_index, test_index in skfolds.split(X, Y):
    clone_classifier = clone(classifier)
    X_train_folds = X[train_index]
    y_train_folds = Y[train_index]
    X_test_fold = X[test_index]
    y_test_fold = Y[test_index]

    clone_classifier.fit(X_train_folds, y_train_folds)
    y_pred = clone_classifier.predict(X_test_fold)
    n_correct = sum(y_pred == y_test_fold)
    print(n_correct / len(y_pred))

0.966666666667
0.966666666667
0.9
1.0
1.0


In [35]:
iris = load_iris()
classifier = DecisionTreeClassifier()

param_grid = [    
    {'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] }]

grid_search = GridSearchCV(classifier, param_grid, cv=10,
                           scoring='neg_mean_squared_error')

grid_search.fit(iris.data, iris.target)
print grid_search.best_params_

{'max_depth': 3}
