## Exercise 1

*In this programming exercise we take the polarity dataset and build a perceptron using only numpy with only one layer to train the model and try to get the accuracy above 70%*

In [1]:
import numpy as np
import os
# ----------------------------------------------------------------------------------------------------------------------
#               5.1 DATASET READER
# ----------------------------------------------------------------------------------------------------------------------

rawdev = open("DATA/rt-polarity.dev.vecs").read()
rawtrain = open("DATA/rt-polarity.train.vecs").read()
rawtest = open("DATA/rt-polarity.train.vecs").read()



In [2]:
def extract_data(df):
    

    #extract labels from file
    
    df_parsed = df.replace('\t','\n').split('\n')
    df_label = df_parsed[1::3]
    labels = np.array(list(map(lambda x:1 if x == 'label=POS' else 0, df_label)),dtype=int)
    labels = labels.reshape(len(labels),1)
    
    #extract reviews as np.array from file
    
    review = df_parsed[2::3]
    review = np.array(review,dtype=str)
    
    review_num = {}
    
    for x in range(len(review)):
        review_num[x] = np.array(list(map(lambda x:x.replace("'", ""), review[x].split())),dtype=float,order='K')
    
    train = np.zeros((len(labels),100))
    
    for i in range(len(labels)):
        train[i]= review_num[i].flatten().reshape(1,100)
    
    #adding bias to the data and concatenating the features and bias 
    
    bias = np.ones((len(labels),1),dtype=float)
    x = np.concatenate((train,bias),axis=1)
    
    return x,labels

    




In [3]:
#extract features and labels from all 3 datasets

x_dev,labels_dev = extract_data(rawdev)
x_train,labels_train = extract_data(rawtrain)
x_test,labels_test = extract_data(rawtest)

In [6]:
# ----------------------------------------------------------------------------------------------------------------------
#               5.2 NUMPY IMPLEMENTATION
# ----------------------------------------------------------------------------------------------------------------------

#function for sigmoid activation function

def sigmoid(z):
    s = 1/(1+np.exp(-z ))
    return s

#function for derivative of sigmoid activation function

def sigmoid_derv(z):
    s = 1/(1+np.exp(-z))
    return s*(1-s)

#function for finding accuracy of a model.

def accuracy(x,w,actual):
    
    y_est = np.empty((len(x),1))
    
    #estimating labels based on the threshold of 0.5 on the output of act.function
    
    for i in range(len(actual)):
        z = np.dot(x[i].T,w)
        if sigmoid(z)<0.5:
            y_est[i] = 0
        else:
            y_est[i] = 1
        
    right=0
    
    #computing the accuracy of the outputs
    
    for i in range(len(actual)):
        if y_est[i]==actual[i]:
            right+=1
    accuracy = right/len(actual)*100
    return accuracy

#function for squared loss of the estimated labels

def squared_loss(x,w,actual):
    
    loss = 0
    for i in range(len(actual)):
        loss += (sigmoid(np.dot(x[i].T,w))-actual[i])**2
    return np.array(loss,dtype=int)


def perceptron_model(df,alpha,batch,epochs,y):
    
    batch_1 = batch-1
    w_sum = np.zeros((101,1),dtype=float)
    rng = np.random.RandomState(200)
    w = rng.normal(0,1,(101,1))
    for i in range(epochs):
        
        for j in range(df.shape[0]):
            z = np.dot(df[j].T,w)
            y_estim = sigmoid(z)
            w_sum += (y_estim-y[j])*sigmoid_derv(z)*df[j].reshape(101,1) #summation of weights of each mini-batch
            if j%batch==batch_1:        #conditon to update the weights only at the end of each batch
                w -= (alpha*w_sum)/batch
                w_sum = np.zeros((101,1),dtype=float) # clearing the accumulated weights after each epoch
            if j==df.shape[0]-1:                 #updating the weight at the end of each dataset
                w = w - (alpha*w_sum)/batch
    
    return w
                



In [8]:
# ----------------------------------------------------------------------------------------------------------------------
#               5.3 TRAINING
# ----------------------------------------------------------------------------------------------------------------------

weights = perceptron_model(df=x_train,alpha=0.01,batch=10,epochs=59,y=labels_train)

print("accuracy of dev set: {}".format(accuracy(x_dev,weights,labels_dev)))
print("Squared loss of dev set: {}".format(squared_loss(x_dev,weights,labels_dev)))


print("accuracy of test set: {}".format(accuracy(x_test,weights,labels_test)))
print("Squared loss of test set: {}".format(squared_loss(x_test,weights,labels_test)))


  s = 1/(1+np.exp(-z ))
  s = 1/(1+np.exp(-z))


accuracy of dev set: 66.79174484052533
Squared loss of dev set: [527]
accuracy of test set: 68.32797427652733
Squared loss of test set: [2333]


In [None]:
def perceptron_accu(df,alpha,batch,y,x_test,labels_test):
    epochs= 0
    batch_1 = batch-1
    w_sum = np.zeros((101,1),dtype=float)
    rng = np.random.RandomState(200)
    w = rng.normal(0,1,(101,1))
    
    while 1!=0:
        #print("epoch {} running".format(i))
        #rng.shuffle(df)
        epochs+=1
        for j in range(df.shape[0]):
            z = np.dot(df[j].T,w)
            y_estim = sigmoid(z)
            w_sum += (y_estim-y[j])*sigmoid_derv(z)*df[j].reshape(101,1) #summation of weights of each mini-batch
            if j%batch==batch_1:        #conditon to update the weights only at the end of each batch
                w -= (alpha*w_sum)/batch
                w_sum = np.zeros((101,1),dtype=float) # clearing the accumulated weights after each epoch
            if j==df.shape[0]-1:                 #updating the weight at the end of each dataset
                w = w - (alpha*w_sum)/batch
        
        if accuracy(x_dev,w,labels_dev)>=74:
            print("epoch : {} weights: {} accuracy: {}".format(epochs,w,accuracy(x_test,w,labels_test)))
            break
            
    return w