In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import time

print("Downloading MNIST dataset (this may take a few minutes)...")
X, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)
X = X / 255.0
y = y.astype(int)
print("Dataset loaded successfully:", X.shape, y.shape)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

def evaluate_model(name, model, X_train, y_train, X_test, y_test):
    start = time.time()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    end = time.time()
    return {
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, average="weighted"),
        "Recall": recall_score(y_test, y_pred, average="weighted"),
        "F1": f1_score(y_test, y_pred, average="weighted"),
        "Time (s)": round(end - start, 2)
    }

results = []


print("\nTraining Linear SVM...")
svm_linear = SVC(kernel="linear", C=1)
results.append(evaluate_model("SVM (Linear)", svm_linear, X_train, y_train, X_test, y_test))


print("Training Polynomial SVM with Grid Search...")
params_poly = {"C": [0.5, 1], "degree": [2, 3], "coef0": [0.0, 1.0]}
svm_poly = GridSearchCV(SVC(kernel="poly", gamma="scale"), params_poly, cv=3, n_jobs=-1)
results.append(evaluate_model("SVM (Polynomial)", svm_poly, X_train, y_train, X_test, y_test))


print("Training RBF SVM with Grid Search...")
params_rbf = {"C": [1, 5], "gamma": ["scale", 0.01]}
svm_rbf = GridSearchCV(SVC(kernel="rbf"), params_rbf, cv=3, n_jobs=-1)
results.append(evaluate_model("SVM (RBF)", svm_rbf, X_train, y_train, X_test, y_test))



print("\nTraining KNN...")
knn = KNeighborsClassifier(n_neighbors=3)
results.append(evaluate_model("KNN (k=3)", knn, X_train, y_train, X_test, y_test))

print("Training SGD...")
sgd = SGDClassifier(max_iter=1000, tol=1e-3, random_state=42)
results.append(evaluate_model("SGD", sgd, X_train, y_train, X_test, y_test))

print("Training Random Forest...")
rf = RandomForestClassifier(n_estimators=100, random_state=42)
results.append(evaluate_model("Random Forest", rf, X_train, y_train, X_test, y_test))

df_results = pd.DataFrame(results)
print("\nClassifier Comparison on MNIST:\n")
print(df_results)

plt.figure(figsize=(10,5))
plt.bar(df_results["Model"], df_results["Accuracy"], color="skyblue")
plt.xticks(rotation=45)
plt.ylabel("Accuracy")
plt.title("Classifier Comparison on MNIST Dataset")
plt.tight_layout()
plt.savefig("mnist_comparison.png", dpi=300)
plt.show()


df_results.to_csv("mnist_results.csv", index=False)
print("\nResults saved as 'mnist_results.csv' and 'mnist_comparison.png'")


Downloading MNIST dataset (this may take a few minutes)...
Dataset loaded successfully: (70000, 784) (70000,)

Training Linear SVM...
