In [2]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import RandomizedSearchCV

In [3]:
wine_data = load_wine()
X, y = wine_data.data, wine_data.target

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
param_dist = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'max_depth': [None, 5, 10, 15, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2', None]
}

In [6]:
dt_classifier = DecisionTreeClassifier()

In [7]:
random_search = RandomizedSearchCV(dt_classifier, param_distributions=param_dist, n_iter=100, cv=5, scoring='accuracy', random_state=42)

In [8]:
random_search.fit(X_train, y_train)



In [9]:
print("Best Parameters:", random_search.best_params_)

Best Parameters: {'splitter': 'random', 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': None, 'max_depth': None, 'criterion': 'gini'}


In [10]:
best_dt_classifier = DecisionTreeClassifier(**random_search.best_params_)

In [11]:
best_dt_classifier.fit(X_train, y_train)

In [12]:
accuracy = best_dt_classifier.score(X_test, y_test)
print("Test Accuracy:", accuracy)

Test Accuracy: 0.8888888888888888


Random forest

In [19]:
from sklearn.model_selection import ShuffleSplit

In [20]:
shuffle_split = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)

In [21]:
tree_classifiers = []
for train_index, _ in shuffle_split.split(X_train):
    X_subset, y_subset = X_train[train_index], y_train[train_index]

    tree_classifier = DecisionTreeClassifier(**random_search.best_params_)
    tree_classifier.fit(X_subset, y_subset)

    tree_classifiers.append(tree_classifier)

In [22]:
accuracies = [tree.score(X_test, y_test) for tree in tree_classifiers]
for i, accuracy in enumerate(accuracies):
    print(f"Tree {i + 1} Accuracy: {accuracy}")

Tree 1 Accuracy: 0.9444444444444444
Tree 2 Accuracy: 0.8333333333333334
Tree 3 Accuracy: 0.9166666666666666
Tree 4 Accuracy: 0.9166666666666666
Tree 5 Accuracy: 0.8611111111111112
Tree 6 Accuracy: 0.8611111111111112
Tree 7 Accuracy: 0.9166666666666666
Tree 8 Accuracy: 0.9166666666666666
Tree 9 Accuracy: 0.9444444444444444
Tree 10 Accuracy: 0.8888888888888888


In [23]:
previous_accuracy = best_dt_classifier.score(X_test, y_test)
print(f"Previous Decision Tree Accuracy: {previous_accuracy}")

Previous Decision Tree Accuracy: 0.8888888888888888
