In [None]:
import numpy as np
import os

from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

import joblib

In [None]:
DATA_PATH = '../data'
IMAGE_SIZE = 784  # 28 * 28

In [None]:
def load_mnist(path, kind='train'):
    """
    'train-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz',
    't10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz'
    Before use, you need to download the above four files to the `path` directory and unzip them
    """
    labels_path = os.path.join(path, '%s-labels.idx1-ubyte' % kind)
    images_path = os.path.join(path, '%s-images.idx3-ubyte' % kind)

    with open(labels_path, 'rb') as label_file:
        labels = np.frombuffer(label_file.read(), dtype=np.uint8, offset=8)

    with open(images_path, 'rb') as image_file:
        images = np.frombuffer(image_file.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), IMAGE_SIZE)

    return images, labels

In [None]:
# Read MNIST dataset
x_train, y_train = load_mnist(DATA_PATH, kind='train')
# x_test, y_test = load_mnist(DATA_PATH, kind='t10k')

In [None]:
# Feature Engineering: Standardization
transfer4 = StandardScaler()
x_train = transfer4.fit_transform(x_train)

In [None]:
# x_train = x_train[:1000]
# y_train = y_train[:1000]

In [None]:
# SVM classifier
svm_model4 = SVC(kernel='poly', max_iter=5000, gamma='scale', probability=True)

In [None]:
# Grid search and cross validation
param_dict = {
    'C': [18, 18.5, 19, 19.5, 20],
     # 'kernel': ['linear', 'rbf', 'poly'],
     # 'gamma': ['scale', 'auto']
}
svm_model4 = GridSearchCV(svm_model4, param_dict, n_jobs=-1, cv=2)

In [None]:
# Train model
svm_model4.fit(x_train, y_train)

In [None]:
# Results of hyperparameter tuning on training data
print("Best parameters: \n", svm_model4.best_params_)
print("Best results (results in the validation set): \n", svm_model4.best_score_)
print("Best estimator: \n", svm_model4.best_estimator_)
print("Cross-validation results.: \n", svm_model4.cv_results_)

In [None]:
# Save model
joblib.dump(svm_model4, '../models/svm_model4.pkl')
# Save StandardScaler
joblib.dump(transfer4, '../models/transfer4.pkl')