In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

"""
    Loads data from a csv file
"""
def load_data(filename):
    df = pd.read_csv(filename, header=None)
    return df

In [4]:
"""
    Preprocess the training data
"""
def preprocess_train_data(df):
    # Extract the predicted values from the dataset and replace '3' by '1' and '5' by '-1'
    actual=(np.where(df.iloc[:,0]==3,1,-1))

    # Extract the training data inputs from the dataset, append a bias term and assign it to X
    X = np.column_stack([np.ones((df.shape[0],1)).astype(float),df.iloc[:,1:]])
    return X, actual

In [10]:
"""
    Training linear classifier using online perceptron to fetch w
"""
def train_online_perceptron(dataset, actual, epoch):
    # Initialise the weight array with zeros
    w = np.zeros((1, dataset.shape[1]))
    
    # Initialise a weight vector to store the final weights at the end of each iteration
    W = np.zeros((epoch, dataset.shape[1]))
    
    # For each iteration in epoch
    for iter in range(epoch):
        print('iteration: ', iter+1)
        t=0 # Initializing T to zero ??
        S=[] # Initialise an empty list which will contain samples which were incorrectly classified.
        
        # Initialise loss to zero for each iteration
        total_error=0.0
        
        # Loop over each row in the dataset
        for index in range(len(actual)):
            x=dataset[index]
            y=actual[index]
            
            # Compute activation for the current example in the dataset
            activation = np.sign(np.matmul(w,np.transpose(x)))
            
            # If the prediction is wrong, update the weights
            if (np.dot(activation, y) <= 0):
                # update weights
                w = w + np.dot(y, x)
                t += 1 #??
            # Calculate the error for missclassification    
            total_error+= max(0, -1*np.dot(y, activation))
        
        W[iter]=w #final weights at the end of last iteration.
        
        print("Total error: ", total_error/t)
    return W # return the final estimated weights 

In [6]:
# Load training data
df = load_data('pa2_train.csv')
print(df.head())

   0    1    2    3    4    5    6    7    8    9   ...   775  776  777  778  \
0    5  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0 ...   0.0  0.0  0.0  0.0   
1    5  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0 ...   0.0  0.0  0.0  0.0   
2    3  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0 ...   0.0  0.0  0.0  0.0   
3    5  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0 ...   0.0  0.0  0.0  0.0   
4    3  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0 ...   0.0  0.0  0.0  0.0   

   779  780  781  782  783  784  
0  0.0  0.0  0.0  0.0  0.0  0.0  
1  0.0  0.0  0.0  0.0  0.0  0.0  
2  0.0  0.0  0.0  0.0  0.0  0.0  
3  0.0  0.0  0.0  0.0  0.0  0.0  
4  0.0  0.0  0.0  0.0  0.0  0.0  

[5 rows x 785 columns]


In [7]:
X, Y = preprocess_train_data(df)
print(X)
print(Y)

[[1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]
[-1 -1  1 ...  1 -1  1]


In [11]:

Trained_W=train_online_perceptron(X, Y, 15)
print(Trained_W)
    

iteration:  1
Total error:  [0.99778271]
iteration:  2
Total error:  [1.]
iteration:  3
Total error:  [1.]
iteration:  4
Total error:  [1.]
iteration:  5
Total error:  [1.]
iteration:  6
Total error:  [1.]
iteration:  7
Total error:  [1.]
iteration:  8
Total error:  [1.]
iteration:  9
Total error:  [1.]
iteration:  10
Total error:  [1.]
iteration:  11
Total error:  [1.]
iteration:  12
Total error:  [1.]
iteration:  13
Total error:  [1.]
iteration:  14
Total error:  [1.]
iteration:  15
Total error:  [1.]
[[ -23.    0.    0. ...    0.    0.    0.]
 [ -39.    0.    0. ...    0.    0.    0.]
 [ -61.    0.    0. ...    0.    0.    0.]
 ...
 [-182.    0.    0. ...    0.    0.    0.]
 [-191.    0.    0. ...    0.    0.    0.]
 [-202.    0.    0. ...    0.    0.    0.]]
