In [1]:
from sklearn.datasets import make_moons
import numpy as np

data = make_moons(n_samples = 10000, noise = 0.4)

In [2]:
from sklearn.model_selection import train_test_split

data_set = np.c_[data]
train, test = train_test_split(data_set, train_size = 0.2)
x_train, y_train, x_test, y_test = train[:, :-1], train[:, -1], test[:, :-1], test[:, -1]

In [3]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score

tree_clf = DecisionTreeClassifier(max_depth = 10, max_leaf_nodes = 5, random_state = 42)
tree_clf.fit(x_train, y_train)
tree_scores = cross_val_score(tree_clf, x_train, y_train, cv = 10)
tree_scores

array([0.845, 0.895, 0.865, 0.89 , 0.845, 0.82 , 0.83 , 0.835, 0.845,
       0.875])

In [4]:
from scipy.stats import randint
from sklearn.model_selection import RandomizedSearchCV

parameters = [{"max_depth": randint(low = 1, high = 100),
              "max_leaf_nodes": randint(low = 2, high = 100)}]

random = RandomizedSearchCV(tree_clf, parameters, n_iter = 1000, cv = 5, scoring = "neg_root_mean_squared_error", random_state = 42)
random.fit(x_train, y_train)

In [5]:
model = random.best_estimator_
(model.predict(x_train) == y_train).sum() / len(y_train)
random.best_estimator_

In [6]:
(model.predict(x_test) == y_test).sum() / len(y_test)

0.85375

In [7]:
from sklearn.model_selection import ShuffleSplit
from sklearn.base import clone

forest = [clone(random.best_estimator_) for x in range(1000)]
splitter = ShuffleSplit(n_splits = 1000, test_size = len(x_train) - 100, random_state = 42)
sets = []

for train_index, test_index in splitter.split(x_train):
    sets.append((x_train[train_index], y_train[train_index]))

In [8]:
from sklearn.metrics import accuracy_score

accuracy = []
predictions = []

for tree, (x_set, y_set) in zip(forest, sets):
    tree.fit(x_set, y_set)
    
    accuracy.append(accuracy_score(y_test, tree.predict(x_test)))
    predictions.append([tree.predict(x_test)])

In [9]:
from scipy.stats import mode

overall_pred, count= mode(predictions, axis = 0, keepdims = True)
accuracy_score(y_test, overall_pred.reshape([-1]))

0.860375