In [2]:
from sklearn.datasets import fetch_openml
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
import numpy as np
from bayes_opt import BayesianOptimization
from sklearn.metrics import accuracy_score

# Load MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data
y = mnist.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

param_grid = {
    'max_depth': [10, 20, 30, 40, 50],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'criterion': ['gini', 'entropy']
}

def grid_search(param_grid):
    model = DecisionTreeClassifier()
    search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy')
    search.fit(X_train, y_train)
    return search.best_params_, search.best_score_

def random_search(num_iterations, param_grid):
    best_score = -np.inf
    best_params = {}

    for _ in range(num_iterations):
        params = {param: np.random.choice(values) for param, values in param_grid.items()}
        model = DecisionTreeClassifier(**params)
        score = np.mean(cross_val_score(model, X_train, y_train, cv=3, scoring='accuracy'))

        if score > best_score:
            best_score = score
            best_params = params

    return best_params, best_score

# Random Search
best_params_random, best_score_random = random_search(100, param_grid)
print("Random Search - Best Parameters:", best_params_random)
print("Random Search - Best Accuracy:", best_score_random)

# Grid Search
best_params_grid, best_score_grid = grid_search(param_grid)
print("Grid Search - Best Parameters:", best_params_grid)
print("Grid Search - Best Accuracy:", best_score_grid)

# Model with best parameters from Random Search
model_random = DecisionTreeClassifier(**best_params_random)
model_random.fit(X_train, y_train)
accuracy_random = accuracy_score(y_test, model_random.predict(X_test))
print("Test Accuracy with Best Parameters from Random Search:", accuracy_random)

# Model with best parameters from Grid Search
model_grid = DecisionTreeClassifier(**best_params_grid)
model_grid.fit(X_train, y_train)
accuracy_grid = accuracy_score(y_test, model_grid.predict(X_test))
print("Test Accuracy with Best Parameters from Grid Search:", accuracy_grid)

def objective(criterion, max_depth, min_samples_split, min_samples_leaf):
    max_depth = int(max_depth)
    min_samples_split = int(min_samples_split)
    min_samples_leaf = int(min_samples_leaf)
    criterion = "gini" if criterion < 0.5 else "entropy"
    
    model = DecisionTreeClassifier(
        criterion=criterion,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf
    )
    
    return np.mean(cross_val_score(model, X_train, y_train, cv=3, scoring="accuracy"))

optimizer = BayesianOptimization(
    f=objective,
    pbounds={
        "criterion": (0, 1),
        "max_depth": (10, 50),
        "min_samples_split": (2, 10),
        "min_samples_leaf": (1, 4)
    },
    random_state=42,
    verbose=2
)

optimizer.maximize(n_iter=25, init_points=5)

print("Bayesian Optimization - Best Parameters:", optimizer.max['params'])

params_bayesian = optimizer.max['params']
params_bayesian['criterion'] = "gini" if params_bayesian['criterion'] < 0.5 else "entropy"
params_bayesian['max_depth'] = int(params_bayesian['max_depth'])
params_bayesian['min_samples_split'] = int(params_bayesian['min_samples_split'])
params_bayesian['min_samples_leaf'] = int(params_bayesian['min_samples_leaf'])
#
model_bayesian = DecisionTreeClassifier(**params_bayesian)
model_bayesian.fit(X_train, y_train)

# Evaluation
accuracy_bayesian = accuracy_score(y_test, model_bayesian.predict(X_test))

print("Test Accuracy with Best Parameters from Bayesian Optimization:", accuracy_bayesian)


  warn(


KeyboardInterrupt: 