# Pocket Algorithm

In [1]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt

In [2]:
#Read the dataset 'classification',remove the last column:
dataset = pd.read_table('classification.txt',sep=',',header=None,names=['x1','x2','x3','na','label'])
dataset = dataset.drop(['na'],axis=1)

#Add the first column-X0:
x0 = []
for i in range(len(dataset)):
    x0.append(1)

first_col = pd.DataFrame(columns=['x0'],data=x0)

dataset = first_col.merge(dataset,how='inner',left_index=True, right_index=True)

In [3]:
#Check our dataset:
dataset

Unnamed: 0,x0,x1,x2,x3,label
0,1,0.750072,0.977408,0.885658,1
1,1,0.877914,0.019251,0.506711,-1
2,1,0.777325,0.994066,0.822244,1
3,1,0.181158,0.460749,0.525477,1
4,1,0.114564,0.067555,0.128920,1
...,...,...,...,...,...
1995,1,0.302021,0.049354,0.973333,-1
1996,1,0.196709,0.598557,0.252530,-1
1997,1,0.515506,0.153544,0.012755,1
1998,1,0.228226,0.971554,0.183059,1


In [32]:
#Divide our dataset into X(coordinates of points) and Y(label):
X = dataset.drop(['label'],axis=1)
Y = dataset.loc[:,'label']  
#print(X)
#print(Y)

In [33]:
#Randomize the initial weights:
weights = (np.random.random(4))  
print('The initial weights are',weights)

The initial weights are [0.78998745 0.95451615 0.35463112 0.86410985]


In [34]:
#Define a function for pocket algorithm error rate, using it as a standard to update weights:
def error_rate (data,w,x,y):
    n_error = 0
    for i in range(len(data)):
        if np.dot(w,(np.array(x.iloc[i]))) >= 0 and y.iloc[i] == -1:
            n_error += 1
        elif np.dot(w,(np.array(x.iloc[i]))) < 0 and y.iloc[i] == 1:
            n_error += 1
        else:
            pass
    return n_error/(len(data))

In [39]:
#Define the pocket PLA function: 
def pocket_algorithm (data,x,y,w,n_iteration,alpha):
    n = 0
    error = []
    while n < n_iteration:
        n+=1        
        for i in range(len(data)):
            thershold = np.dot(w,(np.array(x.iloc[i])))
            old_error = error_rate(data,w,x,y)
            
            if thershold < 0 and y.iloc[i] == 1:  #Pick the violated data points 
                new_weights = w + alpha * np.array(x.iloc[i])
                new_error = error_rate(data,new_weights,x,y)
                
                if new_error < old_error:
                    w = new_weights 
                    error.append(new_error*2000)  #Update the weight values 
                    break
                else:
                    continue
                    
            elif thershold >= 0 and y.iloc[i] == -1:  #Pick the violated data points 
                new_weights = w - alpha * np.array(x.iloc[i])
                new_error = error_rate(data,new_weights,x,y)
   
                if new_error < old_error:
                    w = new_weights               #Update the weight values
                    error.append(new_error*2000)
                    break
                else:
                    continue
            else:
                continue 
    
    print('The weights after Pocket PLA iterates',n_iteration,'times are',w)    
    return w

In [40]:
#Run the Pocket algorithm 7000 times:
#Set alpha (learning rate) = 0.01

w7000=pocket_algorithm (dataset,X,Y,weights,7000,0.01)

The weights after Pocket PLA iterates 7000 times are [   0.78998745 -103.55879573   66.57939575    3.67893239]


In [41]:
e7000=error_rate(dataset,w7000,X,Y)
print('The number of misclassified points after model iterates 7000 times is',(e7000*2000))

The number of misclassified points after model iterates 7000 times is 971.0


In [42]:
#Calculate the model accuracy rate, which equals to 1-error rate 
accuracy = 1-e7000
print ('The accuracy rate of Pocket PLA after model iterates 7000 times is',accuracy)

The accuracy rate of Pocket PLA after model iterates 7000 times is 0.5145
