In [12]:
# Import necessary libraries
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [15]:
# Task 1: Load the wine dataset
wine_data = load_wine()
X, y = wine_data.data, wine_data.target



In [16]:
# Task 2: Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
# Step 3: Use RandomizedSearchCV for hyperparameter tuning
# Define the hyperparameter grid
param_dist = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'max_depth': [None, 10, 20, 30, 40, 50],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2']
}

In [18]:
# Create a Decision Tree Classifier
dt_classifier = DecisionTreeClassifier()

# Use RandomizedSearchCV for hyperparameter tuning
random_search = RandomizedSearchCV(dt_classifier, param_distributions=param_dist, n_iter=100, cv=5, scoring='accuracy', n_jobs=-1, random_state=42)
random_search.fit(X_train, y_train)

# Print the best hyperparameters found by RandomizedSearchCV
print("Best Hyperparameters:", random_search.best_params_)

# Step 4: Train the Decision Tree with the best hyperparameters
best_dt_classifier = random_search.best_estimator_
best_dt_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = best_dt_classifier.predict(X_test)

# Evaluate the model's accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Check if the accuracy meets the target of at least 85%
if accuracy >= 0.85:
    print("Achieved the target accuracy!")
else:
    print("Did not achieve the target accuracy.")

Best Hyperparameters: {'splitter': 'best', 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'log2', 'max_depth': 30, 'criterion': 'entropy'}
Accuracy: 0.9444444444444444
Achieved the target accuracy!


In [20]:
from sklearn.model_selection import ShuffleSplit
from sklearn.ensemble import RandomForestClassifier
from scipy.stats import mode

# Step 1: Create 10 subsets of the training dataset using ShuffleSplit
num_subsets = 10
shuffle_split = ShuffleSplit(n_splits=num_subsets, test_size=0.2, random_state=42)

# Initialize a list to store the trained decision trees
forest_trees = []

for train_index, _ in shuffle_split.split(X_train):
    # Step 2: Train 1 decision tree on each subset
    subset_X_train, subset_y_train = X_train[train_index], y_train[train_index]

    # Create a Decision Tree Classifier with the best hyperparameters
    tree_classifier = DecisionTreeClassifier(**random_search.best_params_)

    # Train the decision tree on the subset
    tree_classifier.fit(subset_X_train, subset_y_train)

    # Add the trained tree to the list
    forest_trees.append(tree_classifier)

# Step 3: Evaluate all the trees on the test dataset
# Initialize an array to store predictions from each tree
forest_predictions = []

# Make predictions for each tree in the forest
for tree in forest_trees:
    tree_pred = tree.predict(X_test)
    forest_predictions.append(tree_pred)

# Combine the predictions using majority voting (mode)
ensemble_predictions = np.array(forest_predictions).T
final_predictions = mode(ensemble_predictions, axis=1).mode.flatten()

# Evaluate the performance of the random forest
forest_accuracy = accuracy_score(y_test, final_predictions)
print("Random Forest Accuracy:", forest_accuracy)

# Compare with the previous Decision Tree accuracy
print("Previous Decision Tree Accuracy:", accuracy)

# Check if the random forest performs better than the previous Decision Tree
if forest_accuracy > accuracy:
    print("Random Forest performs better than the previous Decision Tree.")
else:
    print("Random Forest does not outperform the previous Decision Tree.")


Random Forest Accuracy: 0.9444444444444444
Previous Decision Tree Accuracy: 0.9444444444444444
Random Forest does not outperform the previous Decision Tree.
