In [1]:
import os
import time

import struct
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Prepare the training set and testing set

In [2]:
def load_data(path, kind=''):
    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte'
                               % kind)
    with open(labels_path, 'rb') as lbpath:
        magic, n = struct.unpack('>II',
                                 lbpath.read(8))
        labels = np.fromfile(lbpath,
                             dtype=np.uint8)

    with open(images_path, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack('>IIII',
                                               imgpath.read(16))
        images = np.fromfile(imgpath,
                             dtype=np.uint8).reshape(len(labels), 784)

    return images, labels

path = './datasets/'

In [3]:
# get mnist and emnist data
X_mnist_train,y_mnist_train = load_data(path, kind='train')
X_mnist_test,y_mnist_test = load_data(path, kind='t10k')

In [4]:
noise_random_60 = np.random.rand(60000,784)
noise_labels_60 = np.full((60000), 10, dtype=int)

noise_random_6 = np.random.rand(6000,784)
noise_labels_6 = np.full((6000), 10, dtype=int)

In [5]:
X_random_train_60 = np.concatenate((X_mnist_train, noise_random_60), axis = 0 )
y_random_train_60 = np.concatenate((y_mnist_train, noise_labels_60), axis = 0 )

X_random_train_6 = np.concatenate((X_mnist_train, noise_random_6), axis = 0 )
y_random_train_6 = np.concatenate((y_mnist_train, noise_labels_6), axis = 0 )

In [6]:
# Random shuffle the training and testing datasets
from scipy.sparse import coo_matrix
from sklearn.utils import shuffle

X_sparse_mnist_train = coo_matrix(X_mnist_train)
X_mnist_train, X_sparse_mnist_train, y_mnist_train = shuffle(X_mnist_train, X_sparse_mnist_train, y_mnist_train, random_state=666)

X_sparse_random_train_60 = coo_matrix(X_random_train_60)
X_random_train_60, X_sparse_random_train_60, y_random_train_60 = shuffle(X_random_train_60, X_sparse_random_train_60, y_random_train_60, random_state=666)

X_sparse_random_train_6 = coo_matrix(X_random_train_6)
X_random_train_6, X_sparse_random_train_6, y_random_train_6 = shuffle(X_random_train_6, X_sparse_random_train_6, y_random_train_6, random_state=666)

In [7]:
X_mnist_train = X_mnist_train/255
X_mnist_test = X_mnist_test/255

X_random_train_60 = X_random_train_60/255
X_random_train_6  = X_random_train_6/255

# Training

In [8]:
#from sklearn.pipeline import Pipeline 
from sklearn.model_selection import train_test_split, GridSearchCV 
from sklearn.metrics import accuracy_score

from sklearn import svm

In [9]:
C = 0.1
hs_mnist_svm_scores = []
hs_random_60_svm_scores= []
hs_random_6_svm_scores= []

In [10]:
for i in range(3):
    svm_clf_mnist = svm.SVC(kernel='linear', C = C)
    
    svm_clf_mnist.fit(X_mnist_train, y_mnist_train)
    
    y_pred_mnist = svm_clf_mnist.predict(X_mnist_test)
    hs_no_score = accuracy_score(y_mnist_test, y_pred_mnist)
    
    hs_mnist_svm_scores.append(hs_no_score)
    
    print("------------------------------------------------------------")
    print(i)
    print("hs_no_score = %f" % hs_no_score)
    print("-------------------------------------------------------------")
    print()

------------------------------------------------------------
0
hs_no_score = 0.947200
-------------------------------------------------------------

------------------------------------------------------------
1
hs_no_score = 0.947200
-------------------------------------------------------------

------------------------------------------------------------
2
hs_no_score = 0.947200
-------------------------------------------------------------



In [12]:
for j in range(3):
    svm_clf_random_6 = svm.SVC(kernel='linear', C = C)
    
    svm_clf_random_6.fit(X_random_train_6, y_random_train_6)
    
    y_pred_random_6 = svm_clf_random_6.predict(X_mnist_test)
    hs_random_score_6 = accuracy_score(y_mnist_test, y_pred_random_6)
    print("------------------------------------------------------------")
    print(j)
    print("hs_random_score = %f" % hs_random_score_6)
    print("-------------------------------------------------------------")
    print()
    hs_random_6_svm_scores.append(hs_random_score_6)
    

------------------------------------------------------------
0
hs_random_score = 0.947200
-------------------------------------------------------------

------------------------------------------------------------
1
hs_random_score = 0.947200
-------------------------------------------------------------

------------------------------------------------------------
2
hs_random_score = 0.947200
-------------------------------------------------------------



In [13]:
for h in range(3):
    svm_clf_random_60 = svm.SVC(kernel='linear', C = C)
    
    svm_clf_random_60.fit(X_random_train_60, y_random_train_60)
    
    y_pred_random_60 = svm_clf_random_60.predict(X_mnist_test)
    hs_random_score_60 = accuracy_score(y_mnist_test, y_pred_random_60)
    print("------------------------------------------------------------")
    print(j)
    print("hs_random_score = %f" % hs_random_score_60)
    print("-------------------------------------------------------------")
    print()
    hs_random_60_svm_scores.append(hs_random_score_60)
    

------------------------------------------------------------
2
hs_random_score = 0.947200
-------------------------------------------------------------

------------------------------------------------------------
2
hs_random_score = 0.947200
-------------------------------------------------------------

------------------------------------------------------------
2
hs_random_score = 0.947200
-------------------------------------------------------------



In [14]:
hs_mnist_svm_scores

[0.9472, 0.9472, 0.9472]

In [15]:
hs_random_6_svm_scores

[0.9472, 0.9472, 0.9472]

In [16]:
hs_random_60_svm_scores

[0.9472, 0.9472, 0.9472]