# PA 3: Multiclass Classification

In [1]:
import pandas as pd
import numpy as np
import random as rand

In [2]:
# Load in Datasets
training_data = pd.read_csv('./data/pa3train.txt', sep=" ", header=None)
label_index = training_data.shape[1] - 1
training_data_q1 = training_data.loc[training_data[label_index] <= 2]

# Map all 2 and 1 values to 1, -1
for i, row in training_data_q1.iterrows(): 
    if row[label_index] == 1: 
        row[label_index] = -1
    else: 
        row[label_index] = 1

In [3]:
def calc_error(Y_pred, Y_label): 
    # Calculate Error Rate for Predicted Labels
    error = [0 for x,y in zip(Y_pred,Y_label) if x != y]
    error_rate = len(error)/len(Y_pred)
    return error_rate

In [4]:
class perceptron:
    
    # Constructor
    def __init__(self):
        self.w = np.empty((0,0))
        self.w_list = []
        self.c_list = []
    
    # Make Predictions
    def predict(self, test_data, model):
        predictions = []
        # Error Checking
        if model not in ['normal','voted','averaged']:
            print("ERROR")
            return []
        for i,row in test_data.iterrows():
            sample_point = row[:test_data.shape[1]-1]
            if model == 'normal': 
                prediction = np.sign(np.dot(self.w, sample_point))
                if prediction == 0: 
                    prediction = rand.choice([1,-1])
                predictions.append(prediction)
            elif model == 'voted': 
                prediction = np.sign(sum(c*(np.sign(np.dot(w,sample_point))) for c, w in zip(self.c_list, self.w_list)))
                if prediction == 0: 
                    prediction = rand.choice([1,-1])
                predictions.append(prediction)
            elif model == 'averaged': 
                prediction = np.sign(np.dot((sum(c*w for c, w in zip(self.c_list, self.w_list))),sample_point))
                if prediction == 0: 
                    prediction = rand.choice([1,-1])
                predictions.append(prediction)
        return predictions
    
    # Train Classifier
    def train(self, data, num_passes): 
        w_list = []
        c_list = []
        c = 0
        w = np.zeros((data.shape[1]-1,))
        for p in range(num_passes): 
            for i, row in data.iterrows(): 
                X = row[:data.shape[1]-1]
                Y = row[data.shape[1]-1]
                if Y*(np.dot(w,np.transpose(X))) <= 0: 
                    # Adjust decision boundary, otherwise keep it 
                    w = np.add(w,Y*X)
                    w_list.append(w)
                    c_list.append(c)
                    c = 1
                else: 
                    c += 1
        c_list.append(c)
        self.w_list = w_list
        self.w = w_list[-1]  
        self.c_list = c_list

# Question 1

In [5]:
clf1 = perceptron()
clf1.train(training_data_q1, 2)
y_pred_normal = clf1.predict(training_data_q1, 'normal')
y_pred_avg = clf1.predict(training_data_q1, 'averaged')
y_pred_voted = clf1.predict(training_data_q1, 'voted')

In [6]:
y_true = training_data_q1[label_index]
print("ERROR RATES FOR TWO PASSES")
print("Error Rate Normal : ",calc_error(y_pred_normal, y_true))
print("Error Rate Voted: ",calc_error(y_pred_voted, y_true))
print("Error Rate Average: ",calc_error(y_pred_avg, y_true))

ERROR RATES FOR TWO PASSES
Error Rate Normal :  0.03944954128440367
Error Rate Voted:  0.03761467889908257
Error Rate Average:  0.05412844036697248


In [7]:
clf2 = perceptron()
clf2.train(training_data_q1, 3)
y_pred_normal = clf2.predict(training_data_q1, 'normal')
y_pred_avg = clf2.predict(training_data_q1, 'averaged')
y_pred_voted = clf2.predict(training_data_q1, 'voted')

In [8]:
y_true = training_data_q1[label_index]
print("ERROR RATES FOR THREE PASSES")
print("Error Rate Normal: ",calc_error(y_pred_normal, y_true))
print("Error Rate Voted: ",calc_error(y_pred_voted, y_true))
print("Error Rate Average: ",calc_error(y_pred_avg, y_true))

ERROR RATES FOR THREE PASSES
Error Rate Normal:  0.02110091743119266
Error Rate Voted:  0.031192660550458717
Error Rate Average:  0.03669724770642202


In [16]:
w_avg = (sum(c*w for c, w in zip(clf2.c_list, clf2.w_list)))
sorted_w = np.argsort(w_avg)
print("Top (Strong Positive Predictors): \n", sorted_w[-3:])
print("Bottom (Strong Negative Predictors): \n", sorted_w[:3])

Top (Strong Positive Predictors): 
 816    393
817    469
818     78
dtype: int64
Bottom (Strong Negative Predictors): 
 0    438
1    466
2    203
dtype: int64


In [17]:
clf = perceptron()
clf.train(training_data_q1, 4)
y_pred_normal = clf.predict(training_data_q1, 'normal')
y_pred_avg = clf.predict(training_data_q1, 'averaged')
y_pred_voted = clf.predict(training_data_q1, 'voted')

In [18]:
y_true = training_data_q1[label_index]
print("ERROR RATES FOR FOUR PASSES")
print("Error Rate Normal: ",calc_error(y_pred_normal, y_true))
print("Error Rate Voted: ",calc_error(y_pred_voted, y_true))
print("Error Rate Average: ",calc_error(y_pred_avg, y_true))

ERROR RATES FOR FOUR PASSES
Error Rate Normal:  0.01926605504587156
Error Rate Voted:  0.022018348623853212
Error Rate Average:  0.031192660550458717


# Question 3: