In [1]:
import pandas as pd
import numpy as np
import time
from sklearn.metrics import f1_score,accuracy_score,recall_score

In [2]:
def load_Data(filename):
    fr = open(filename,'r')
    x,y = [],[]
    for line in fr.readlines():
        curline = line.strip().split(',')
        x.append([int(num) / 255 for num in curline[1:]])
        y.append(1 if int(curline[0]) <= 4 else -1)
        if len(x) == 1000:
            break
    return x,y 

In [3]:
def percetron(x_train,y_train):
    data_mat = np.mat(x_train,dtype = np.float32)
    label_mat = np.mat(y_train).T

    len_data_mat = data_mat.shape[0]
    
    b = 0
    lr = 1e-4
    alpha = np.zeros((len_data_mat,))
    G_matrix = np.dot(data_mat,data_mat.T)

    print("start to train...")
    for epoch in range(30):
        start_train = time.time()
        for i in range(len_data_mat):
            x_i = data_mat[i]
            y_i = label_mat[i]
            sum = 0
            
            for j in range(len_data_mat):
                sum += alpha[j] * label_mat[j] * G_matrix[i,j]
                
            if (sum + b) * y_i <= 0:
                alpha[i] += lr
                b += y_i*lr
                
        print('Time of epoch {} consume:{:.2f} seconds:'.format(epoch + 1,time.time() - start_train))
    return G_matrix,alpha,b

In [4]:
def test(x,y,alpha,b,G_matrix):
    data_mat = np.mat(x,dtype = np.float32)
    label_mat = np.mat(y).T
    
    len_data_mat = len(data_mat)
    correct = 0
    
    for i in range(len_data_mat):

        x_i = data_mat[i]
        y_i = label_mat[i]
        
        sum = 0
        for j in range(len_data_mat):
            sum += alpha[j] * label_mat[j] * G_matrix[i,j]
        if y_i * (sum + b) > 0:
            correct += 1

    return correct/len_data_mat

In [5]:
x_train,y_train = load_Data('../input/mnist_train.csv')
x_val,y_val = load_Data('../input/mnist_test.csv')
print("x_train_length:",len(x_train),"x_val_length:",len(x_val))
print("y_train_length:",len(y_train),"y_val_length:",len(y_val))

x_train_length: 1000 x_val_length: 1000
y_train_length: 1000 y_val_length: 1000


In [6]:
G,alpha,b = percetron(x_train,y_train)

start to train...
Time of epoch 1 consume:28.95 seconds:
Time of epoch 2 consume:34.59 seconds:
Time of epoch 3 consume:40.94 seconds:
Time of epoch 4 consume:35.44 seconds:
Time of epoch 5 consume:32.33 seconds:
Time of epoch 6 consume:35.21 seconds:
Time of epoch 7 consume:30.22 seconds:
Time of epoch 8 consume:33.74 seconds:
Time of epoch 9 consume:35.79 seconds:
Time of epoch 10 consume:38.48 seconds:
Time of epoch 11 consume:38.52 seconds:
Time of epoch 12 consume:25.00 seconds:
Time of epoch 13 consume:36.96 seconds:
Time of epoch 14 consume:36.89 seconds:
Time of epoch 15 consume:33.89 seconds:
Time of epoch 16 consume:31.02 seconds:
Time of epoch 17 consume:30.68 seconds:
Time of epoch 18 consume:33.16 seconds:
Time of epoch 19 consume:32.06 seconds:
Time of epoch 20 consume:35.43 seconds:
Time of epoch 21 consume:37.00 seconds:
Time of epoch 22 consume:30.05 seconds:
Time of epoch 23 consume:28.02 seconds:
Time of epoch 24 consume:29.73 seconds:
Time of epoch 25 consume:32.18 

In [7]:
acc_train = test(x_train,y_train,alpha,b,G)
print("accuracy:",acc_train)

accuracy: 0.923


In [8]:
acc_val = test(x_val,y_val,alpha,b,G)
print("accuracy:",acc_val)

accuracy: 0.544
