In [119]:
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=10000, noise=0.4)

In [120]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [121]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier

tree_clf = DecisionTreeClassifier(random_state=42)

grid_search = GridSearchCV(tree_clf, {"max_leaf_nodes": range(5, 50)})
grid_search.fit(X_train, y_train)
grid_search.best_estimator_

DecisionTreeClassifier(max_leaf_nodes=5, random_state=42)

In [122]:
from sklearn.metrics import accuracy_score

y_pred = grid_search.predict(X_test)
accuracy_score(y_test, y_pred)

0.8375

In [123]:
tree_clf.get_params()

{'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'random_state': 42,
 'splitter': 'best'}

#### Growing a forest

In [124]:
from sklearn.model_selection import ShuffleSplit

subsets = []

rs = ShuffleSplit(n_splits=1000, test_size=len(X_train) - 100, random_state=42)
for mini_train_index, _ in rs.split(X_train):
    X_mini_train = X_train[mini_train_index]
    y_mini_train = y_train[mini_train_index]
    subsets.append((X_mini_train, y_mini_train))

In [125]:
from sklearn.base import clone
from sklearn.metrics import accuracy_score
import numpy as np

forest = [clone(grid_search.best_estimator_) for _ in range(1000)]

accuracy_scores = []

for tree, (X_mini_train, y_mini_train) in zip(forest, subsets):
    tree.fit(X_mini_train, y_mini_train)

    y_pred = tree.predict(X_test)
    accuracy_scores.append(accuracy_score(y_test, y_pred))

np.mean(accuracy_scores)

0.811372

In [126]:
from scipy.stats import mode

Y_pred = np.empty([1000, len(X_test)], dtype=int)

for i, tree in enumerate(forest):
    Y_pred[i] = tree.predict(X_test)

y_pred_majority_votes, n_votes = mode(Y_pred, axis=0)
accuracy_score(y_test, y_pred_majority_votes.reshape([-1]))

0.839