In [1]:
%matplotlib inline
import matplotlib.pyplot as plt 
import gzip, os
import numpy as np
from urllib.request import urlretrieve
from sklearn.svm import LinearSVC
from sklearn.svm import SVC

In [2]:
# Function that downloads a specified MNIST data file from Yann Le Cun's website
def download(filename, source='http://yann.lecun.com/exdb/mnist/'):
    print("Downloading %s" % filename)
    urlretrieve(source + filename, filename)

# Invokes download() if necessary, then reads in images
def load_mnist_images(filename):
    if not os.path.exists(filename):
        download(filename)
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
    data = data.reshape(-1,784)
    return data

def load_mnist_labels(filename):
    if not os.path.exists(filename):
        download(filename)
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=8)
    return data

In [7]:
def evaluate_SVM(svm, data, labels):
    predictions = svm.predict(data)
    errors = np.sum(predictions != labels)
    return errors/len(labels)

def learn_and_evaluate_SVM(kernel_type='linear', C_value=1.0):
    ## Load the training set
    train_data = load_mnist_images('train-images-idx3-ubyte.gz')
    train_labels = load_mnist_labels('train-labels-idx1-ubyte.gz')

    ## Load the testing set
    test_data = load_mnist_images('t10k-images-idx3-ubyte.gz')
    test_labels = load_mnist_labels('t10k-labels-idx1-ubyte.gz')
    
    if kernel_type == 'linear':
        clf = LinearSVC(loss='hinge', C=C_value)
    if kernel_type == 'quadratic':
        clf = SVC(kernel='poly', degree=2, C=C_value)
        
    clf.fit(train_data, train_labels)
    
    print('Test Error: ', evaluate_SVM(clf, test_data, test_labels))
    print('Training Error: ', evaluate_SVM(clf, train_data, train_labels))
    

In [None]:
learn_and_evaluate_SVM(kernel_type='linear', C_value=1.0)