In [None]:
from datasets.mnist import read_data

# load the data
chose_split = True # load all the data and then chose how to split
if chose_split:
    X, y = read_data.load_all_data()

    from sklearn.model_selection import train_test_split
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1337)
    del X # save some space
    del y # save some space
else:
    # load the training and testing sets as given by the author
    x_train, y_train = read_data.load_train_data()
    x_test, y_test = read_data.load_test_data()

# serialize images (from 2d to 1d)
x_train = x_train.reshape((x_train.shape[0], -1))
x_test = x_test.reshape((x_test.shape[0], -1))

test = True # for testing stuff without the need to wait an eternity
# test = False # for testing stuff without the need to wait an eternity
if test:
    x_train = x_train[:10000]
    y_train = y_train[:10000]
    x_test = x_test[:1000]
    y_test = y_test[:1000]

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA

pipe = Pipeline([('scaler1', StandardScaler()),
                 ('PCA', PCA(0.9)),
                 ('scaler2', StandardScaler()),
                ])
pipe.fit(x_train)
x_train = pipe.transform(x_train)
x_test = pipe.transform(x_test)

In [None]:
from sklearn.svm import SVC
from sklearn import metrics
from time import time

class SVM_wrapper(SVC):
    def __init__(self, kernel, **kwargs):
        super().__init__(kernel=kernel, **kwargs)
        self.init_kwargs = kwargs
    
    def fit(self, *args, **kwargs):
        t0 = time()
        super().fit(*args, **kwargs)
        self.train_time = time() - t0
    
    def calculate_metrics(self, x, y):
        y_predicted = self.predict(x)
        acc = metrics.accuracy_score(y, y_predicted)
        prec = metrics.precision_score(y, y_predicted, average="macro")
        recall = metrics.recall_score(y, y_predicted, average="macro")
        f1 = metrics.f1_score(y, y_predicted, average="macro")
        return (acc, prec, recall, f1)

    def calculate_train_test_metrics(self, x_train, y_train, x_test, y_test):
        self.calculate_test_metrics(x_test, y_test)
        self.calculate_train_metrics(x_train, y_train)

    def calculate_train_metrics(self, x_train, y_train):
        self.train_metrics = self.calculate_metrics(x_train, y_train)

    def calculate_test_metrics(self, x_test, y_test):
        self.test_metrics = self.calculate_metrics(x_test, y_test)

params = {
          'linear': {'C': (0.1, 1, 10)},
          'poly': {'C': (0.1, 1, 10), 'degree': (2, 3, 4)},
          'rbf': {'C': (0.1, 1, 10)}, # 'gamma': ('scale', 'auto') could use that but will result to the same value
          'sigmoid': {'C': (0.1, 1, 10), 'coef0': (0, 1, 10)},
         }
'''
params = {
          'linear': None,
          'poly': None,
          'rbf': None, # 'gamma': ('scale', 'auto') could use that but will result to the same value
          'sigmoid': None,
         }
'''


from itertools import product
models = []
for kernel, kwargs in params.items():
    if kwargs is not None:
        # calculate all the permutations
        values_list = list(kwargs.values())
        permutations = product(*values_list)

        for p in permutations:
            arg_dict = dict(zip(list(kwargs.keys()), p))
            models.append(SVM_wrapper(kernel, **arg_dict))
    else:
        models.append(SVM_wrapper(kernel))

In [None]:
for model in models:
    model.fit(x_train, y_train)
    mins, secs = divmod(model.train_time, 60)
    print(f'{mins} mins {secs:.4f} secs')

In [None]:
'''
from concurrent.futures import ThreadPoolExecutor
with ThreadPoolExecutor(max_workers = 2) as executor:
    executor.map(lambda x: x.fit(x_train, y_train), models)

for model in models:
    # model.fit(x_train, y_train)
    mins, secs = divmod(model.train_time, 60)
    print(f'{mins} mins {secs:.4f} secs')
'''

In [None]:
for model in models:
    model.calculate_train_test_metrics(x_train, y_train, x_test, y_test)
    print(model.kernel, model.train_metrics, model.test_metrics)

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(15,12))
ax = sns.heatmap(confusion_matrix(y_test, pred), annot=True, cmap='Purples', fmt='g')

ax.set_title('Confusion Matrix')
ax.set_xlabel('Predictions')
ax.set_ylabel('Actual Values')
ax.xaxis.set_ticklabels(list(range(10)))
ax.yaxis.set_ticklabels(list(range(10)))

plt.show()