In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import cifar10
from skopt import BayesSearchCV
from cuml.ensemble import RandomForestClassifier as cuRF
from cuml.svm import SVC as cuSVC
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, roc_curve, auc, confusion_matrix
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelBinarizer
from sklearn.pipeline import Pipeline
import cupy as cp

In [None]:
# Load the CIFAR-10 dataset
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

In [None]:
# Reshape, scale, and convert to CuPy
X_train = X_train.reshape(50000, 3072).astype(np.float32)
X_test = X_test.reshape(10000, 3072).astype(np.float32)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_train_scaled = cp.asarray(X_train_scaled)
X_test_scaled = cp.asarray(X_test_scaled)
y_train = cp.asarray(y_train.flatten())
y_test = cp.asarray(y_test.flatten())

In [None]:
# # Hyperparameter search spaces
# rf_param_grid = {
#     "n_estimators": [50, 100, 200, 300, 500],
#     "criterion": ["gini", "entropy"],
#     "max_depth": [None, 3, 5, 10, 20, 30],
#     "min_samples_leaf": [1, 2, 4, 8, 16],
#     "max_features": ["sqrt", "log2", 0.25, 0.5],
#     "bootstrap": [True, False],
#     "min_samples_split": [2, 5, 10],
#     "class_weight": [None, "balanced", "balanced_subsample"],
# }
# svm_param_grid = {
#     "C": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000],
#     "kernel": ["linear", "rbf", "poly", "sigmoid"],
#     "gamma": ["scale", "auto", 0.001, 0.01, 0.1, 1, 10],
#     "degree": [2, 3, 4],
#     "coef0": [0.0, 0.1, 0.5, 1],
#     "class_weight": [None, "balanced"],
# }

# # Ensemble hyperparameter search space
# ensemble_param_grid = {
#     "voting": ["soft", "hard"],
#     "weights": [None, [0.5, 0.5], [0.3, 0.7], [0.7, 0.3]],
#     "rf__n_estimators": [100, 200, 300],
#     "rf__max_depth": [None, 10, 20],
#     "svm__C": [0.1, 1, 10],
#     "svm__gamma": ["scale", "auto", 0.01],
# }

# Hyperparameter search spaces (Reduced for faster testing)
rf_param_grid = {
    "n_estimators": [50, 100, 200],
    "max_depth": [None, 10, 20],
    "min_samples_split": [2, 5],
    "max_features": ["sqrt", 0.5],
}

svm_param_grid = {
    "C": [0.1, 1, 10],
    "kernel": ["linear", "rbf"],
    "gamma": ["scale", "auto"],
}

ensemble_param_grid = {
    "voting": ["soft", "hard"],
    "weights": [None, [0.5, 0.5]],
    "rf__n_estimators": [100, 200], 
    "rf__max_depth": [None, 10],
    "svm__C": [0.1, 1],
    "svm__gamma": ["scale", "auto"],
}

In [None]:
# # Tune Random Forest (cuML)
# rf = cuRF(random_state=42)
# bayes_search_rf = BayesSearchCV(estimator=rf, search_spaces=rf_param_grid, cv=3, random_state=42) 
# bayes_search_rf.fit(X_train_scaled, y_train)

# Tune Random Forest (reduced iterations for testing)
rf = cuRF(random_state=42)
bayes_search_rf = BayesSearchCV(estimator=rf, search_spaces=rf_param_grid, cv=3, random_state=42, n_iter=15) 
bayes_search_rf.fit(X_train_scaled, y_train)

In [None]:
# # Tune SVM (cuML)
# svm = cuSVC(probability=True, random_state=42)
# bayes_search_svm = BayesSearchCV(estimator=svm, search_spaces=svm_param_grid, cv=3, random_state=42) 
# bayes_search_svm.fit(X_train_scaled, y_train)

# Tune SVM (reduced iterations for testing)
svm = cuSVC(probability=True, random_state=42)
bayes_search_svm = BayesSearchCV(estimator=svm, search_spaces=svm_param_grid, cv=3, random_state=42, n_iter=15) # n_iter reduced
bayes_search_svm.fit(X_train_scaled, y_train)

In [None]:
# Create base models with best individual parameters
best_rf = cuRF(**bayes_search_rf.best_params_, random_state=42)
best_svm = cuSVC(**bayes_search_svm.best_params_, probability=True, random_state=42)

In [None]:
# Create base ensemble model with best individual parameter models 
ensemble = Pipeline([
    ("voting", VotingClassifier(estimators=[('rf', best_rf), ('svm', best_svm)]))
])

In [None]:
# # Tuning Ensemble 
# bayes_search_ensemble = BayesSearchCV(estimator=ensemble, search_spaces=ensemble_param_grid, cv=3, n_jobs=-1, random_state=42)
# bayes_search_ensemble.fit(X_train_scaled, y_train)
# print("Best Ensemble (BayesSearch) Hyperparameters:", bayes_search_ensemble.best_params_) 

# Tuning Ensemble (Reduced iterations for testing)
bayes_search_ensemble = BayesSearchCV(estimator=ensemble, search_spaces=ensemble_param_grid, cv=3, n_jobs=-1, random_state=42, n_iter=15)
bayes_search_ensemble.fit(X_train_scaled, y_train)
print("Best Ensemble (BayesSearch) Hyperparameters:", bayes_search_ensemble.best_params_) 

best_ensemble = bayes_search_ensemble.best_estimator_["voting"] 

In [None]:
# --- Evaluation ---
def evaluate_model(model, X_test, y_test):
    y_test_bin = LabelBinarizer().fit_transform(
        cp.asnumpy(y_test)
    )  # Use LabelBinarizer, convert CuPy to NumPy for plotting
    pred = cp.asnumpy(model.predict(X_test))  # Convert Cupy to Numpy
    accuracy = accuracy_score(cp.asnumpy(y_test), pred)
    print(f"Accuracy: {accuracy:.4f}")

    y_score = cp.asnumpy(model.predict_proba(X_test))  # Convert Cupy to Numpy
    plot_roc_curve(y_test_bin, y_score)
    plot_confusion_matrix(cp.asnumpy(y_test), pred)  # Convert CuPy to NumPy here


def plot_roc_curve(y_test_bin, y_score):
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(10):
        fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    plt.figure(figsize=(10, 8))
    for i in range(10):
        plt.plot(fpr[i], tpr[i], label="Class {} (AUC = {:.2f})".format(i, roc_auc[i]))
    plt.plot([0, 1], [0, 1], "k--")
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title("Receiver Operating Characteristic (ROC) Curves")
    plt.legend(loc="lower right")
    plt.show()


def plot_confusion_matrix(y_test, pred):
    cm = confusion_matrix(y_test, pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(
        cm,
        annot=True,
        fmt="d",
        cmap="Blues",
        xticklabels=range(10),
        yticklabels=range(10),
    )
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.title("Confusion Matrix")
    plt.show()

In [None]:

print("\n--- SVM (Best from BayesSearch) ---")
evaluate_model(bayes_search_svm.best_estimator_, X_test_scaled, y_test)

In [None]:
print("\n--- Random Forest (Best from BayesSearch) ---")
evaluate_model(bayes_search_rf.best_estimator_, X_test_scaled, y_test)

In [None]:
print("\n--- Ensemble Model (Best from BayesSearch) ---")
evaluate_model(best_ensemble, X_test_scaled, y_test)