In [89]:
import os
import numpy as np
import struct
import time

In [132]:
def load_mnist(path, kind='train'):
    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               'MNIST_%s-labels-idx1-ubyte'
                               % kind)
    images_path = os.path.join(path,
                               'MNIST_%s-images-idx3-ubyte'
                               % kind)
    with open(labels_path, 'rb') as lbpath:
        magic, n = struct.unpack('>II',
                                 lbpath.read(8))
        labels = np.fromfile(lbpath,
                             dtype=np.uint8)

    with open(images_path, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack('>IIII',
                                               imgpath.read(16))
        images = np.fromfile(imgpath,
                             dtype=np.uint8).reshape(len(labels), 784)
    X = images[labels<=1,:]
    Y = []
    labels = labels[labels<=1]
    for y in labels:
        if y==0:
            Y.append(-1)
        elif y==1:
            Y.append(1)
            

    return X, np.array(Y)

In [133]:
train_images, train_labels = load_mnist('../data')
test_images, test_label = load_mnist('../data','t10k')

In [151]:
def perceptron(data, label, iters=50):
    """
    感知机训练过程。
    data:
    label:
    iter:
    """
    print('start to train...')
    data = np.mat(data) #转换之后每个样本都是横向向量
    label = np.mat(label).T
    n,m = data.shape #m:单个样本维度，n:样本数量
    w = np.zeros((1,m))
    b = 0
    #初始步长，也就是梯度下降速率
    h = 0.0001
    
    for k in range(iters):
        # sgd
        for i in range(n):
            xi = data[i]
            yi = label[i]
            if (-1*yi*(w*xi.T + b)) >=0:
                w = w + h*yi*xi
                b = b + h*yi
        print('Round %d:%d training'%(k,iters))
    return w,b
    

In [152]:
def model_test(data, label, w, b):
    print('start to test')
    data = np.mat(data)
    label = np.mat(label).T
    n,m = data.shape
    errorCnt = 0
    for i in range(n):
        xi = data[i]
        yi = label[i]
        result = -1*yi*(w*xi.T + b)
        if result >=0: errorCnt+1
        
    acc = 1-(errorCnt/n)
    return acc

In [153]:
# train_y = train_labels[train_labels<=1]
# train_X = train_images[train_labels<=1,:]
# test_y = test_label[test_label<=1]
# test_X = test_images[test_label<=1,:]

In [154]:
start = time.time()
w,b = perceptron(train_images, train_labels, iters=10)
acc = model_test(test_images, test_label, w, b)
end = time.time()
print('Acc: ',acc)
print('time span:', end-start)

start to train...
Round 0:10 training
Round 1:10 training
Round 2:10 training
Round 3:10 training
Round 4:10 training
Round 5:10 training
Round 6:10 training
Round 7:10 training
Round 8:10 training
Round 9:10 training
start to test
Acc:  1.0
time span: 3.1431360244750977
