In [None]:
# Importing some needed packages

import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

# Set default parameters for plots
plt.rcParams['figure.figsize'] = (12.0, 6.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

In [None]:
# Loading the datasets
from pandas import read_csv

train_data = read_csv('../Datasets/mnist_train.csv.gz', compression='gzip', header=None).to_numpy()
test_data = read_csv('../Datasets/mnist_test.csv.gz', compression='gzip', header=None).to_numpy()

In [None]:
# Split into features and labels

X_train = train_data[:,1]
y_train = train_data[:,0]
X_test = test_data[:,1:]
y_test = test_data[:,0]

In [None]:
# Scale features around 0 and same variance

clf = make_pipeline(StandardScaler(), SVC(cache_size=4000))
clf = clf.fit(X_train, y_train)

In [5]:
from sklearn.metrics import accuracy_score, confusion_matrix

def eval_clf(y_pred_train, y_train, y_pred_test, y_test):
    acc_train = accuracy_score(y_train, y_pred_train)
    acc_test = accuracy_score(y_test, y_pred_test)
    cm_test = confusion_matrix(y_test, y_pred_test)

    return acc_train, acc_test, cm_test

In [None]:
acc_train, acc_test, cm_test = eval_clf(
    clf.predict(X_train),
    y_train,
    clf.predict(X_test),
    y_test
)
print("Linear SVM accuracy train: {}".format(acc_train))
print("Linear SVM accuracy test: {}".format(acc_test))
print("Confusion matrix:\n%s" % cm_test)

In [None]:
# Linear SVM accuracy train: 0.9899166666666667
# Linear SVM accuracy test: 0.9792
# Confusion matrix:
# [[ 973    0    1    0    0    2    1    1    2    0]
#  [   0 1126    3    1    0    1    1    1    2    0]
#  [   6    1 1006    2    1    0    2    7    6    1]
#  [   0    0    2  995    0    2    0    5    5    1]
#  [   0    0    5    0  961    0    3    0    2   11]
#  [   2    0    0    9    0  871    4    1    4    1]
#  [   6    2    0    0    2    3  944    0    1    0]
#  [   0    6   11    1    1    0    0  996    2   11]
#  [   3    0    2    6    3    2    2    3  950    3]
#  [   3    4    1    7   10    2    1    7    4  970]]