Train and fine-tune a decision tree for the moon dataset by following given steps

Step 1: Use make_moons to generate a moons dataset

In [1]:
from sklearn.datasets import make_moons

X_moons, y_moons = make_moons(n_samples=10000, noise=0.4)

Step 2: Use train_test_split to split the dataset into a training set and a test set

In [2]:
from sklearn.model_selection import train_test_split

X_moons_train, X_moons_test, y_moons_train, y_moons_test = train_test_split(
    X_moons, y_moons, test_size=0.2, random_state=42
)

Step 3: Use grid search with cross validation to find good hyperparameters values for a DecisionTreeClassifier

In [14]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

pca_pipeline = make_pipeline(StandardScaler(), PCA())
X_rotated_moons_train = pca_pipeline.fit_transform(X_moons_train)
X_rotated_moons_test = pca_pipeline.transform(X_moons_test)
tree_clf = DecisionTreeClassifier(random_state=42)

In [12]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform

param_distrib = {
    "max_depth": list(range(1, 10)),
    "min_samples_split": [2, 3, 4],
    # "min_samples_leaf": uniform(1, 10),
    # "max_features": ['auto', 'sqrt', 'log2'],
    "max_leaf_nodes": list(range(2, 100))
}

rnd_search_cv = RandomizedSearchCV(tree_clf, param_distrib, n_iter=100, cv=5,
                                   random_state=42)
rnd_search_cv.fit(X_rotated_moons_train, y_moons_train)
rnd_search_cv.best_estimator_

DecisionTreeClassifier(max_depth=8, max_leaf_nodes=57, random_state=42)

In [13]:
rnd_search_cv.best_score_

0.8552500000000001

In [15]:
from sklearn.metrics import accuracy_score

y_pred = rnd_search_cv.best_estimator_.predict(X_rotated_moons_test)
accuracy_score(y_pred, y_moons_test)

0.8545