In [1]:
%matplotlib inline
import sys
import matplotlib.pyplot as plt 
import gzip, os
import numpy as np
from scipy.stats import multivariate_normal
from sklearn import svm

if sys.version_info[0] == 2:
    from urllib import urlretrieve
else:
    from urllib.request import urlretrieve

In [2]:
# Function that downloads a specified MNIST data file from Yann Le Cun's website
def download(filename, source='https://ossci-datasets.s3.amazonaws.com/mnist/'):
    print("Downloading %s" % filename)
    urlretrieve(source + filename, filename)

# Invokes download() if necessary, then reads in images
def load_mnist_images(filename):
    if not os.path.exists(filename):
        download(filename)
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
    data = data.reshape(-1,784)
    return data

def load_mnist_labels(filename):
    if not os.path.exists(filename):
        download(filename)
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=8)
    return data

In [6]:
## Load the training set
train_data = load_mnist_images('train-images-idx3-ubyte.gz')
train_labels = load_mnist_labels('train-labels-idx1-ubyte.gz')

## Load the testing set
test_data = load_mnist_images('t10k-images-idx3-ubyte.gz')
test_labels = load_mnist_labels('t10k-labels-idx1-ubyte.gz')

In [3]:
def fit_linear_svc(C_value=1.0):
    
    ## Fit SVM classifier on train data
    clf = svm.LinearSVC(C=C_value, loss='hinge')
    clf.fit(train_data,train_labels)
    
    ## Get predictions on training data
    train_preds = clf.predict(train_data)
    train_error = float(np.sum(train_preds != train_labels) / len(train_labels))
    
    ## Get predictions on test data
    test_preds = clf.predict(test_data)
    test_error = float(np.sum(test_preds != test_labels) / len(test_labels))
    
    return train_preds, test_preds, train_error, test_error

In [None]:

cvals = [0.01,0.1,1.0,10.0,100.0]

for c in cvals:
    train_preds, test_preds, train_error, test_error = fit_linear_svc(c)
    print ("Error rate for C = %0.2f: train %0.3f test %0.3f" % (c, train_error, test_error))



Error rate for C = 0.01: train 0.141 test 0.146




In [4]:
def fit_svm_classifier(classifier):
    """
    Generic function to fit any SVM classifier and evaluate performance
    
    Parameters:
    - classifier: sklearn SVM classifier instance (LinearSVC, SVC, etc.)
    
    Returns:
    - train_preds, test_preds, train_error, test_error
    """
    ## Fit SVM classifier on train data
    classifier.fit(train_data, train_labels)
    
    ## Get predictions on training data
    train_preds = classifier.predict(train_data)
    train_error = float(np.sum(train_preds != train_labels) / len(train_labels))
    
    ## Get predictions on test data
    test_preds = classifier.predict(test_data)
    test_error = float(np.sum(test_preds != test_labels) / len(test_labels))
    
    return train_preds, test_preds, train_error, test_error

In [7]:
# MNIST SVM Experiments using Generic Function
print("MNIST SVM EXPERIMENTS")
print("=" * 60)

# Test different C values for Linear SVM
C_values = [0.01, 0.1, 1.0, 10.0, 100.0]
linear_results = []

print("\n1. LINEAR SVM WITH DIFFERENT C VALUES:")
print("-" * 40)
for C in C_values:
    print(f"Testing Linear SVM with C = {C}")
    
    # Create linear SVM classifier
    linear_clf = svm.LinearSVC(C=C, loss='hinge', max_iter=1000)
    train_preds, test_preds, train_error, test_error = fit_svm_classifier(linear_clf)
    
    linear_results.append({
        'C': C,
        'train_error': train_error,
        'test_error': test_error
    })
    
    print(f"  Train Error: {train_error:.4f}, Test Error: {test_error:.4f}")

print(f"\n{'='*60}")
print("LINEAR SVM SUMMARY:")
for result in linear_results:
    print(f"C = {result['C']:6.2f}: Train = {result['train_error']:.4f}, Test = {result['test_error']:.4f}")

# Find best C value
best_result = min(linear_results, key=lambda x: x['test_error'])
print(f"\n🎯 Best C value: {best_result['C']} (Test Error: {best_result['test_error']:.4f})")

MNIST SVM EXPERIMENTS

1. LINEAR SVM WITH DIFFERENT C VALUES:
----------------------------------------
Testing Linear SVM with C = 0.01




  Train Error: 0.1315, Test Error: 0.1386
Testing Linear SVM with C = 0.1




  Train Error: 0.1308, Test Error: 0.1345
Testing Linear SVM with C = 1.0




  Train Error: 0.1481, Test Error: 0.1550
Testing Linear SVM with C = 10.0




  Train Error: 0.1875, Test Error: 0.1935
Testing Linear SVM with C = 100.0




  Train Error: 0.1061, Test Error: 0.1133

LINEAR SVM SUMMARY:
C =   0.01: Train = 0.1315, Test = 0.1386
C =   0.10: Train = 0.1308, Test = 0.1345
C =   1.00: Train = 0.1481, Test = 0.1550
C =  10.00: Train = 0.1875, Test = 0.1935
C = 100.00: Train = 0.1061, Test = 0.1133

🎯 Best C value: 100.0 (Test Error: 0.1133)




In [8]:
# 2. KERNEL SVM EXPERIMENTS
print("\n2. KERNEL SVM EXPERIMENTS:")
print("-" * 40)

# Quadratic Kernel SVM
print("Testing Quadratic Kernel SVM with C = 1.0")
quad_clf = svm.SVC(kernel='poly', degree=2, C=1.0, coef0=1.0)
quad_train_preds, quad_test_preds, quad_train_error, quad_test_error = fit_svm_classifier(quad_clf)
print(f"  Train Error: {quad_train_error:.4f}, Test Error: {quad_test_error:.4f}")

# RBF Kernel SVM (for comparison)
print("\nTesting RBF Kernel SVM with C = 1.0")
rbf_clf = svm.SVC(kernel='rbf', C=1.0, gamma='scale')
rbf_train_preds, rbf_test_preds, rbf_train_error, rbf_test_error = fit_svm_classifier(rbf_clf)
print(f"  Train Error: {rbf_train_error:.4f}, Test Error: {rbf_test_error:.4f}")

# Get linear SVM result at C=1.0 for comparison
linear_c1_result = [r for r in linear_results if r['C'] == 1.0][0]

print(f"\n{'='*60}")
print("KERNEL COMPARISON AT C = 1.0:")
print(f"Linear SVM    - Train: {linear_c1_result['train_error']:.4f}, Test: {linear_c1_result['test_error']:.4f}")
print(f"Quadratic SVM - Train: {quad_train_error:.4f}, Test: {quad_test_error:.4f}")
print(f"RBF SVM       - Train: {rbf_train_error:.4f}, Test: {rbf_test_error:.4f}")

# Determine best performer
results_c1 = [
    ('Linear', linear_c1_result['test_error']),
    ('Quadratic', quad_test_error),
    ('RBF', rbf_test_error)
]
best_kernel = min(results_c1, key=lambda x: x[1])
print(f"\n🏆 Best kernel: {best_kernel[0]} (Test Error: {best_kernel[1]:.4f})")


2. KERNEL SVM EXPERIMENTS:
----------------------------------------
Testing Quadratic Kernel SVM with C = 1.0
  Train Error: 0.0110, Test Error: 0.0234

Testing RBF Kernel SVM with C = 1.0
  Train Error: 0.0110, Test Error: 0.0234

Testing RBF Kernel SVM with C = 1.0
  Train Error: 0.0101, Test Error: 0.0208

KERNEL COMPARISON AT C = 1.0:
Linear SVM    - Train: 0.1481, Test: 0.1550
Quadratic SVM - Train: 0.0110, Test: 0.0234
RBF SVM       - Train: 0.0101, Test: 0.0208

🏆 Best kernel: RBF (Test Error: 0.0208)
  Train Error: 0.0101, Test Error: 0.0208

KERNEL COMPARISON AT C = 1.0:
Linear SVM    - Train: 0.1481, Test: 0.1550
Quadratic SVM - Train: 0.0110, Test: 0.0234
RBF SVM       - Train: 0.0101, Test: 0.0208

🏆 Best kernel: RBF (Test Error: 0.0208)


In [9]:
# Results summary for easy reference
results_summary = {
    'linear_results': linear_results,
    'quad_train_error': quad_train_error,
    'quad_test_error': quad_test_error,
    'rbf_train_error': rbf_train_error,
    'rbf_test_error': rbf_test_error
}

print("✅ All experiments completed! Results stored in 'results_summary'")

✅ All experiments completed! Results stored in 'results_summary'
