In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier

from skopt import gp_minimize
from skopt.space import Integer, Real
from skopt.utils import use_named_args
from skopt.plots import plot_convergence

data = load_breast_cancer()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

search_space = [
    Integer(50, 300, name='n_estimators'),
    Integer(2, 10, name='max_depth'),
    Real(0.01, 0.3, prior='log-uniform', name='learning_rate'),
    Real(0.5, 1.0, name='subsample')
]

@use_named_args(search_space)
def objective(**params):
    model = XGBClassifier(
        **params,
        eval_metric='logloss',
        random_state=42,
        use_label_encoder=False
    )
    
    score = cross_val_score(
        model, X_train, y_train,
        cv=3, scoring='accuracy'
    ).mean()
    
    return -score  # minimize

bo_result = gp_minimize(
    objective,
    search_space,
    n_calls=30,
    random_state=42
)

best_bo_params = {
    'n_estimators': bo_result.x[0],
    'max_depth': bo_result.x[1],
    'learning_rate': bo_result.x[2],
    'subsample': bo_result.x[3]
}

print("Best Bayesian Optimization Parameters:")
print(best_bo_params)

bo_model = XGBClassifier(
    **best_bo_params,
    eval_metric='logloss',
    random_state=42,
    use_label_encoder=False
)

bo_model.fit(X_train, y_train)
bo_preds = bo_model.predict(X_test)
bo_accuracy = accuracy_score(y_test, bo_preds)

print("Bayesian Optimization Accuracy:", bo_accuracy)

def random_search(n_iter=30):
    scores = []
    
    for _ in range(n_iter):
        params = {
            'n_estimators': np.random.randint(50, 300),
            'max_depth': np.random.randint(2, 10),
            'learning_rate': np.random.uniform(0.01, 0.3),
            'subsample': np.random.uniform(0.5, 1.0)
        }
        
        model = XGBClassifier(
            **params,
            eval_metric='logloss',
            random_state=42,
            use_label_encoder=False
        )
        
        score = cross_val_score(
            model, X_train, y_train,
            cv=3, scoring='accuracy'
        ).mean()
        
        scores.append(score)
        
    return scores

random_scores = random_search()
random_best = max(random_scores)

print("Random Search Best Accuracy:", random_best)

plt.figure(figsize=(8,5))
plt.plot(-np.array(bo_result.func_vals), label="Bayesian Optimization")
plt.plot(np.maximum.accumulate(random_scores), label="Random Search")
plt.xlabel("Iterations")
plt.ylabel("Accuracy")
plt.title("BO vs Random Search Convergence")
plt.legend()
plt.show()

print("\n===== FINAL COMPARISON =====")
print(f"Bayesian Optimization Accuracy: {bo_accuracy:.4f}")
print(f"Random Search Best Accuracy:   {random_best:.4f}")
