In [7]:
# download mnist-dataset
import gzip
import os
import sys
import struct
import numpy as np
import time

def read_image(fi):
    magic, n, rows, columns = struct.unpack(">IIII", fi.read(16))
    assert magic == 0x00000803
    assert rows == 28
    assert columns == 28
    rawbuffer = fi.read()
    assert len(rawbuffer) == n * rows * columns
    rawdata = np.frombuffer(rawbuffer, dtype='>u1', count=n*rows*columns)
    return rawdata.reshape(n, rows, columns).astype(np.float32) / 255.0

def read_label(fi):
    magic, n = struct.unpack(">II", fi.read(8))
    assert magic == 0x00000801
    rawbuffer = fi.read()
    assert len(rawbuffer) == n
    return np.frombuffer(rawbuffer, dtype='>u1', count=n)

if __name__ == '__main__':
    if not os.path.exists('./mnist.npz'):
        os.system('wget -N http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz')
        os.system('wget -N http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz')
        os.system('wget -N http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz')
        os.system('wget -N http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz')

        np.savez_compressed(
            'mnist',
            train_x=read_image(gzip.open('train-images-idx3-ubyte.gz', 'rb')),
            train_y=read_label(gzip.open('train-labels-idx1-ubyte.gz', 'rb')),
            test_x=read_image(gzip.open('t10k-images-idx3-ubyte.gz', 'rb')),
            test_y=read_label(gzip.open('t10k-labels-idx1-ubyte.gz', 'rb'))
        )

        os.system('rm *.gz')

In [8]:
# load dataset
data = np.load('mnist.npz')

data_train = data['train_x'].reshape(-1, 28*28)
label_train = data['train_y']
data_test = data['test_x'].reshape(-1, 28*28)
label_test = data['test_y']

In [9]:
# SVM
from sklearn import datasets, model_selection, svm, metrics

clf = svm.LinearSVC()
t = time.time()
clf.fit(data_train, label_train)
print("Finish training in {}s.".format(time.time() - t))

pre = clf.predict(data_test)
ac_score = metrics.accuracy_score(label_test, pre)
print('Test accuracy is {:.2%}'.format(ac_score))

co_mat = metrics.confusion_matrix(label_test, pre)
print("Confusion matrix is shown below.")
print(co_mat)

Finish training in 105.47698211669922s.
Test accuracy is 91.84%
Confusion matrix is shown below.
[[ 961    0    2    1    1    4    6    3    1    1]
 [   0 1112    3    2    0    1    5    1   11    0]
 [  11   11  914   18   10    4   13   12   36    3]
 [   4    0   19  917    2   23    5   12   19    9]
 [   1    4    6    3  913    0    9    3    5   38]
 [   8    2    0   38   12  771   17    7   29    8]
 [   7    4    7    2    5   21  909    1    2    0]
 [   2    8   23    5    7    1    1  947    4   30]
 [  11   13    8   19   14   31    8   13  844   13]
 [   7    8    2   15   31   12    0   26   12  896]]


In [10]:
# K-Neighbor
from sklearn.neighbors import KNeighborsClassifier

clf = KNeighborsClassifier(n_neighbors=3)
t = time.time()
clf.fit(data_train, label_train)
print("Finish training in {}s.".format(time.time() - t))

pre = clf.predict(data_test)
ac_score = metrics.accuracy_score(label_test, pre)
print('Test accuracy is {:.2%}'.format(ac_score))

co_mat = metrics.confusion_matrix(label_test, pre)
print("Confusion matrix is shown below.")
print(co_mat)

Finish training in 39.710243940353394s.
Test accuracy is 97.05%
Confusion matrix is shown below.
[[ 974    1    1    0    0    1    2    1    0    0]
 [   0 1133    2    0    0    0    0    0    0    0]
 [  10    9  996    2    0    0    0   13    2    0]
 [   0    2    4  976    1   13    1    7    3    3]
 [   1    6    0    0  950    0    4    2    0   19]
 [   6    1    0   11    2  859    5    1    3    4]
 [   5    3    0    0    3    3  944    0    0    0]
 [   0   21    5    0    1    0    0  991    0   10]
 [   8    2    4   16    8   11    3    4  914    4]
 [   4    5    2    8    9    2    1    8    2  968]]


In [11]:
# Random-Forest
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(random_state=0)
t = time.time()
clf.fit(data_train, label_train)
print("Finish training in {}s.".format(time.time() - t))

pre = clf.predict(data_test)
ac_score = metrics.accuracy_score(label_test, pre)
print('Test accuracy is {:.2%}'.format(ac_score))

co_mat = metrics.confusion_matrix(label_test, pre)
print("Confusion matrix is shown below.")
print(co_mat)

Finish training in 4.271898031234741s.
Test accuracy is 94.68%
Confusion matrix is shown below.
[[ 969    2    1    0    0    0    4    1    2    1]
 [   0 1120    4    4    1    1    2    0    3    0]
 [   9    1  980    5    3    1    6   11   14    2]
 [   1    1   12  955    0   16    1    7   12    5]
 [   2    3    2    1  936    0    6    3    8   21]
 [   7    2    2   34    8  812   10    2    5   10]
 [  16    3    3    1    4   12  917    0    2    0]
 [   4    8   27    1    6    0    0  966    4   12]
 [   6    1   13   22    6   16    7    9  886    8]
 [   9    4    6   11   29    8    1    7    7  927]]
