In [None]:
import numpy as np 
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import make_moons


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



In [None]:
X, y = make_moons(n_samples=10000, noise=0.4)

In [None]:
X   

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=plt.cm.RdYlBu)
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.title("Moons Dataset")
plt.show()

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier

param_grid = {'max_leaf_nodes': [2,3,4,5]}

tree = DecisionTreeClassifier()
grid_search = GridSearchCV(tree, param_grid,scoring="accuracy", return_train_score=True, cv=5)
grid_search.fit(X_train, y_train)
max_leaf_nodes = grid_search.best_params_['max_leaf_nodes']
print("Best max_leaf_nodes: ", max_leaf_nodes)


In [None]:
from sklearn.model_selection import cross_val_score
tree = DecisionTreeClassifier(max_leaf_nodes=max_leaf_nodes)
scores = cross_val_score(tree, X_train, y_train,scoring="accuracy", cv=10)
scores.mean()

In [None]:
scores = cross_val_score(tree, X_test, y_test,scoring="accuracy", cv=10)
scores.mean()

In [None]:
from sklearn.model_selection import ShuffleSplit
n_trees=100
train_size=100

rs = ShuffleSplit(n_splits=n_trees, random_state=42, train_size=train_size, test_size=20)
rs.get_n_splits(X_train)

In [None]:
accuracy_scores = []

for i, (mini_train_index, mini_test_index) in enumerate(rs.split(X_train)):
    scores_train = cross_val_score(tree, X_train[mini_train_index], y_train[mini_train_index],scoring="accuracy", cv=5)
    accuracy_scores.append(scores_train)

accuracy_mean = np.mean(accuracy_scores)
print("Accuracy: ", accuracy_mean)

In [None]:
Y_pred = np.empty([n_trees, len(X_test)], dtype=np.uint8)

for i, (mini_train_index, mini_test_index) in enumerate(rs.split(X_train)):
    tree.fit(X_train[mini_train_index], y_train[mini_train_index])
    Y_pred[i] = tree.predict(X_test)

In [None]:
from scipy.stats import mode

y_pred_majority_votes, n_votes = mode(Y_pred, axis=0)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred_majority_votes.reshape([-1]))