### Import data

In [1]:
import numpy as np 
import pandas as pd

df = pd.read_csv("data.csv", header = None).dropna()
df

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,-1,-0.294118,0.487437,0.180328,-0.292929,-1.000000,0.001490,-0.531170,-0.033333
1,1,-0.882353,-0.145729,0.081967,-0.414141,-1.000000,-0.207153,-0.766866,-0.666667
2,-1,-0.058824,0.839196,0.049180,-1.000000,-1.000000,-0.305514,-0.492741,-0.633333
3,1,-0.882353,-0.105528,0.081967,-0.535354,-0.777778,-0.162444,-0.923997,-1.000000
4,-1,-1.000000,0.376884,-0.344262,-0.292929,-0.602837,0.284650,0.887276,-0.600000
...,...,...,...,...,...,...,...,...,...
763,1,0.176471,0.015075,0.245902,-0.030303,-0.574468,-0.019374,-0.920581,0.400000
764,1,-0.764706,0.226131,0.147541,-0.454545,-1.000000,0.096870,-0.776260,-0.800000
765,1,-0.411765,0.216080,0.180328,-0.535354,-0.735225,-0.219076,-0.857387,-0.700000
766,-1,-0.882353,0.266332,-0.016393,-1.000000,-1.000000,-0.102832,-0.768574,-0.133333


### Split data set

In [3]:
y = np.array(df[0])
y_train = y[:607]   # set 80% of samples as training set
y_test = y[607:]   # the rest of samples as testing set

x = np.array(df.drop(0, axis = 1))
x_train = x[:607]
x_test = x[607:]

### Perceptron Algorithm

In [73]:
class Perceptron_alg():
    
    def __init__(self, eta, n_iter): 
        self.eta = eta  # set learning rate
        self.n_iter = n_iter # set times of itteration
        self.prediction_list = None
        
    def fit(self,x_train,y_train): 
        # initialization of parameter weight & b
        self.par = np.zeros(1 + x_train.shape[1]) 

        for _ in range(self.n_iter): 
            for xi,target in zip(x_train,y_train): 

                update = self.eta * (target - self.predict(xi)) 
                
                self.par[1:] += update * xi  # weight

                self.par[0] += update * 1  # b

        return self 
 
    def real_input(self,x_test): 
        return np.dot(x_test,self.par[1:]) + self.par[0] 

    def predict(self,x_test): 
        self.prediction_list = np.where(self.real_input(x_test) >= 0.0 , 1 , -1)
        #return np.where(self.net_input(x) >= 0.0 , 1 , -1)
        return self.prediction_list
        
#     def accuracy(self, y_test):
#         count = 0
#         correct_result = 0
#         for i in self.prediction_list:
#             if i == y_test[count]:
#                 correct_result += 1
#             count += 1
#             accuracy = correct_result / count
#         return accuracy
    def score(self, x_test, y_test):
        pred_y = self.predict(x_test)

        return np.mean(y_test == pred_y)

In [74]:
p = Perceptron_alg(0.001, 400)
p.fit(x_train, y_train)
p.predict(x_test)

array([ 1,  1,  1, -1, -1,  1,  1, -1,  1,  1,  1,  1,  1, -1,  1, -1,  1,
        1,  1,  1, -1,  1,  1,  1,  1,  1, -1, -1, -1,  1, -1, -1, -1,  1,
        1,  1,  1,  1,  1,  1, -1,  1,  1,  1, -1, -1, -1, -1,  1,  1, -1,
        1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1,  1, -1,  1, -1,
        1,  1,  1,  1,  1, -1,  1, -1,  1, -1,  1,  1, -1,  1,  1, -1,  1,
        1, -1, -1,  1,  1,  1,  1, -1,  1, -1,  1, -1,  1,  1, -1, -1,  1,
        1,  1,  1,  1, -1,  1,  1,  1,  1,  1, -1,  1,  1,  1, -1,  1,  1,
        1,  1,  1,  1, -1, -1,  1,  1, -1, -1,  1, -1,  1, -1, -1, -1,  1,
        1, -1, -1,  1,  1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1,  1])

In [75]:
p.par

array([ 0.002     , -0.00152814, -0.01162511,  0.00508387,  0.00490644,
        0.00126565, -0.00963153, -0.00250316, -0.00346728])

In [76]:
p.score(x_test,y_test)

0.8157894736842105

### Using Sklearn toolkit to test subject

In [64]:
from sklearn.linear_model import Perceptron
clf = Perceptron(random_state = 0)
clf.fit(x_train, y_train)

Perceptron()

In [65]:
clf.score(x_test, y_test)

0.6513157894736842