  Building an SVM Classifier for MNIST with Hyperparameter Tuning and Comparative Analysis
  

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/ageron/handson-ml2/blob/master/02_end_to_end_machine_learning_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
  </td>
  <td>
    <a target="_blank" href="https://kaggle.com/kernels/welcome?src=https://github.com/ageron/handson-ml2/blob/master/02_end_to_end_machine_learning_project.ipynb"><img src="https://kaggle.com/static/images/open-in-kaggle.svg" /></a>
  </td>
</table>

Training an SVM Classifier on the MNIST Dataset

In [1]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import time

# Load the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist["data"], mnist["target"].astype(int)

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an initial SVM classifier using the default kernel (RBF)
svm_clf = SVC(kernel='rbf', random_state=42)
start_time = time.time()
svm_clf.fit(X_train, y_train)
training_time = time.time() - start_time

# Predict and evaluate performance
y_pred = svm_clf.predict(X_test)
print(f"SVM (RBF) Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(f"Training Time: {training_time:.2f} seconds")
print(classification_report(y_test, y_pred))

SVM (RBF) Accuracy: 0.9764
Training Time: 314.49 seconds
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      1343
           1       0.98      0.99      0.99      1600
           2       0.97      0.98      0.97      1380
           3       0.97      0.97      0.97      1433
           4       0.97      0.98      0.98      1295
           5       0.98      0.97      0.97      1273
           6       0.98      0.99      0.99      1396
           7       0.97      0.97      0.97      1503
           8       0.97      0.96      0.97      1357
           9       0.97      0.96      0.97      1420

    accuracy                           0.98     14000
   macro avg       0.98      0.98      0.98     14000
weighted avg       0.98      0.98      0.98     14000



 Hyperparameter values for the SVM classifier.
  Polynomial Kernel: Optimizing degree, C, and coef0

In [None]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import time

# Load the MNIST dataset with error handling and data caching
try:
    mnist = fetch_openml('mnist_784', version=1, as_frame=False, cache=True)  # Cache data to avoid re-downloading
except KeyboardInterrupt:
    print("Download interrupted. Please try again later.")
except Exception as e:
    print(f"An error occurred: {e}")

# Continue processing if download was successful
if 'mnist' in locals():
    X, y = mnist["data"], mnist["target"].astype(int)

    # Split the dataset into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train an initial SVM classifier using the default kernel (RBF)
    svm_clf = SVC(kernel='rbf', random_state=42)
    start_time = time.time()
    svm_clf.fit(X_train, y_train)
    training_time = time.time() - start_time

    # Predict and evaluate performance
    y_pred = svm_clf.predict(X_test)
    print(f"SVM (RBF) Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print(f"Training Time: {training_time:.2f} seconds")
    print(classification_report(y_test, y_pred))

    # Define parameter grid for Polynomial kernel
    param_grid_poly = {
        'kernel': ['poly'],
        'C': [0.1, 1, 10],
        'degree': [2, 3, 4],
        'coef0': [0, 1, 10]
    }

    # Perform grid search for polynomial kernel
    svm_poly = SVC()
    grid_search_poly = GridSearchCV(svm_poly, param_grid_poly, cv=5, scoring='accuracy')
    start_time_poly = time.time()
    grid_search_poly.fit(X_train, y_train)  # X_train and y_train are now defined
    training_time_poly = time.time() - start_time_poly

    # Print best parameters and accuracy for Polynomial kernel
    print(f"Best Parameters (Polynomial Kernel): {grid_search_poly.best_params_}")
    print(f"Best Accuracy (Polynomial Kernel): {grid_search_poly.best_score_:.4f}")
    print(f"Training Time (Polynomial Kernel): {training_time_poly:.2f} seconds")



SVM (RBF) Accuracy: 0.9764
Training Time: 281.37 seconds
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      1343
           1       0.98      0.99      0.99      1600
           2       0.97      0.98      0.97      1380
           3       0.97      0.97      0.97      1433
           4       0.97      0.98      0.98      1295
           5       0.98      0.97      0.97      1273
           6       0.98      0.99      0.99      1396
           7       0.97      0.97      0.97      1503
           8       0.97      0.96      0.97      1357
           9       0.97      0.96      0.97      1420

    accuracy                           0.98     14000
   macro avg       0.98      0.98      0.98     14000
weighted avg       0.98      0.98      0.98     14000



In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV # Import GridSearchCV here
import time
from sklearn.datasets import fetch_openml # import fetch_openml to load the data
from sklearn.model_selection import train_test_split # import train_test_split to split the data


# Load the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist["data"], mnist["target"].astype(int)

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

param_grid_rbf = {
    'kernel': ['rbf'],
    'C': [0.1, 1, 10],
    'gamma': ['scale', 'auto']
}

# Perform grid search for RBF kernel
svm_rbf = SVC()
grid_search_rbf = GridSearchCV(svm_rbf, param_grid_rbf, cv=5, scoring='accuracy')
start_time_rbf = time.time()
grid_search_rbf.fit(X_train, y_train)  # Assuming X_train and y_train are defined elsewhere
training_time_rbf = time.time() - start_time_rbf

# Print best parameters and accuracy for RBF kernel
print(f"Best Parameters (RBF Kernel): {grid_search_rbf.best_params_}")
print(f"Best Accuracy (RBF Kernel): {grid_search_rbf.best_score_:.4f}")
print(f"Training Time (RBF Kernel): {training_time_rbf:.2f} seconds")

In [None]:
# Import GridSearchCV at the top of this file
from sklearn.model_selection import GridSearchCV, StratifiedKFold
# Linear Kernel
from sklearn.svm import SVC
svm_linear = SVC(kernel='linear')
svm_linear.fit(X_train, y_train)
y_pred_linear = svm_linear.predict(X_test)

# Define parameter grid for Polynomial kernel (ADD THIS)
param_grid_poly = {
    'kernel': ['poly'],
    'C': [0.1, 1, 10],
    'degree': [2, 3, 4],
    'coef0': [0, 1, 10]
}
# Define parameter grid for RBF kernel
param_grid_rbf = {
    'kernel': ['rbf'],
    'C': [0.1, 1, 10],
    'gamma': ['scale', 'auto']
}

# Perform grid search for RBF kernel
svm_rbf = SVC()
# Use StratifiedKFold with a lower n_splits value for RBF kernel as well
cv_rbf = StratifiedKFold(n_splits=2) # Reduced n_splits to 2 or any value less than your minimum class count
grid_search_rbf = GridSearchCV(svm_rbf, param_grid_rbf, cv=cv_rbf, scoring='accuracy') # Changed cv=5 to cv=cv_rbf
grid_search_rbf.fit(X_train, y_train)  # Assuming X_train and y_train are defined elsewhere
# Polynomial Kernel (best from grid search)
svm_poly = SVC()
# Explicitly use StratifiedKFold with a lower n_splits value
cv_poly = StratifiedKFold(n_splits=2) # Reduced n_splits to 2 or any value less than your minimum class count
grid_search_poly = GridSearchCV(svm_poly, param_grid_poly, cv=cv_poly, scoring='accuracy') # Changed cv=cv to cv=cv_poly for clarity
start_time_poly = time.time()
grid_search_poly.fit(X_train, y_train)
training_time_poly = time.time() - start_time_poly
y_pred_poly = grid_search_poly.predict(X_test)


# RBF Kernel (best from grid search)
best_rbf = grid_search_rbf.best_estimator_
y_pred_rbf = best_rbf.predict(X_test)

# Print classification reports for all kernels
print("Performance for Linear Kernel")
print(classification_report(y_test, y_pred_linear))

print("Performance for Polynomial Kernel")
print(classification_report(y_test, y_pred_poly))

print("Performance for RBF Kernel")
print(classification_report(y_test, y_pred_rbf))

 SVM with different kernels (Linear, Polynomial, and RBF)

In [13]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=3)
start_time_knn = time.time()
knn.fit(X_train, y_train)
training_time_knn = time.time() - start_time_knn

y_pred_knn = knn.predict(X_test)
print(f"KNN Accuracy: {accuracy_score(y_test, y_pred_knn):.4f}")
print(f"Training Time (KNN): {training_time_knn:.2f} seconds")
print(classification_report(y_test, y_pred_knn))

KNN Accuracy: 0.0000
Training Time (KNN): 0.00 seconds
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       0.0
           1       0.00      0.00      0.00       1.0

    accuracy                           0.00       1.0
   macro avg       0.00      0.00      0.00       1.0
weighted avg       0.00      0.00      0.00       1.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [12]:
from sklearn.linear_model import SGDClassifier

sgd = SGDClassifier(random_state=42)
start_time_sgd = time.time()
sgd.fit(X_train, y_train)
training_time_sgd = time.time() - start_time_sgd

y_pred_sgd = sgd.predict(X_test)
print(f"SGD Accuracy: {accuracy_score(y_test, y_pred_sgd):.4f}")
print(f"Training Time (SGD): {training_time_sgd:.2f} seconds")
print(classification_report(y_test, y_pred_sgd))

SGD Accuracy: 1.0000
Training Time (SGD): 0.01 seconds
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1



In [11]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report # Import classification_report

rf = RandomForestClassifier(random_state=42)
start_time_rf = time.time()
rf.fit(X_train, y_train)
training_time_rf = time.time() - start_time_rf

y_pred_rf = rf.predict(X_test)
print(f"Random Forest Accuracy: {accuracy_score(y_test, y_pred_rf):.4f}")
print(f"Training Time (Random Forest): {training_time_rf:.2f} seconds")
print(classification_report(y_test, y_pred_rf))

Random Forest Accuracy: 0.0000
Training Time (Random Forest): 0.41 seconds
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       0.0
           1       0.00      0.00      0.00       1.0

    accuracy                           0.00       1.0
   macro avg       0.00      0.00      0.00       1.0
weighted avg       0.00      0.00      0.00       1.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Comparison


In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split  # Import for splitting data
from sklearn.svm import SVC, LinearSVC # Import the LinearSVC
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
import time


# Assume you have your features in 'X' and target in 'y'
# Replace 'X' and 'y' with your actual variable names
# **Replace these with your actual data**
X = [[1, 2], [3, 4], [5, 6], [7, 8]]  # Example features data - Replace with your data!
y = [0, 1, 0, 1]  # Example target variable data - Replace with your data!

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # Adjust test_size and random_state as needed

# Linear SVM
linear_svm = LinearSVC(random_state=42)  # Initialize LinearSVC
start_time = time.time() # Capture the start time
linear_svm.fit(X_train, y_train)  # Train the model
training_time = time.time() - start_time # Calculate the training time
y_pred_linear = linear_svm.predict(X_test)  # Predict on the test set


# ... (Your existing code for other classifiers, like SVM (Poly), SVM (RBF), KNN, SGD, Random Forest)...
# Example for SVM (Poly) - Adapt for other classifiers as needed:

# Assuming you have code for SVM (Poly), SVM (RBF), KNN, SGD, and Random Forest
# Make sure to define y_pred_poly, y_pred_rbf, y_pred_knn, y_pred_sgd, and y_pred_rf accordingly.

#Import necessary libraries for other classification models
from sklearn.svm import SVC

#SVM with polynomial kernel
poly_svm = SVC(kernel='poly', degree=3, random_state=42)  # Initialize polynomial SVM
start_time_poly = time.time() # Capture the start time
poly_svm.fit(X_train, y_train) # Train the model
training_time_poly = time.time() - start_time_poly # Calculate the training time
y_pred_poly = poly_svm.predict(X_test) # Predict on the test set

#SVM with RBF kernel
rbf_svm = SVC(kernel='rbf', random_state=42)  # Initialize RBF SVM
start_time_rbf = time.time() # Capture the start time
rbf_svm.fit(X_train, y_train) # Train the model
training_time_rbf = time.time() - start_time_rbf # Calculate the training time
y_pred_rbf = rbf_svm.predict(X_test) # Predict on the test set

#KNN Classifier
knn = KNeighborsClassifier(n_neighbors=3) # Initialize KNN classifier
start_time_knn = time.time() # Capture the start time
knn.fit(X_train, y_train) # Train the model
training_time_knn = time.time() - start_time_knn # Calculate the training time
y_pred_knn = knn.predict(X_test) # Predict on the test set

#SGD Classifier
sgd = SGDClassifier(random_state=42)  # Initialize SGD classifier
start_time_sgd = time.time() # Capture the start time
sgd.fit(X_train, y_train) # Train the model
training_time_sgd = time.time() - start_time_sgd # Calculate the training time
y

