In [53]:
import math
from decimal import *
import numpy as np

trainingData = np.genfromtxt('hw4btrain.txt', dtype="f8", delimiter=' ')
testData = np.genfromtxt('hw4btest.txt', dtype="f8", delimiter=' ')

features = trainingData[0].size-1
labels = np.unique(testData[:,features])

class Perceptron:
    
    def __init__(self, trainingDataPath, testDataPath, classval):
        self.w = np.zeros(features)
        self.wlist = [list(self.w)]
        self.c = [1]
        self.classval = classval
        self.trainingData = np.genfromtxt(trainingDataPath, dtype="f8", delimiter=' ')
        self.testData = np.genfromtxt(testDataPath, dtype="f8", delimiter=' ')
    
    def runPerceptron(self):
        for (index, featureVector) in enumerate(self.trainingData):
            x = featureVector[:features]
            y = 1 if featureVector[features] == self.classval else -1
            if y*np.dot(self.w, x) <= 0:
                self.w += x*y
            
        return self.w
    
    def runVoted(self):
        for (index, featureVector) in enumerate(self.trainingData):
            x = featureVector[:features]
            y = 1 if featureVector[features] == self.classval else -1
            if y*np.dot(self.w, x) <= 0:
                self.w += x*y
                self.wlist.append(list(self.w))
                self.c.append(1)
            else:
                self.c[len(self.c)-1] += 1
            
    
    def runForIterations(self, t):
        self.w = np.zeros(features)
        for i in range(0, t):
            self.runPerceptron()
            
    def runForIterationsVoted(self, t):
        self.w = np.zeros(features)
        self.wlist = [list(self.w)]
        self.c = [1]
        for i in range(0,t):
            self.runVoted()
            
    def classifyVector(self, w, vector):        
        return 10 if np.dot(vector[:features], w) <= 0 else self.classval
    
    def classifyVectorVoted(self, vector):
        sigma = []
        for (i, w) in enumerate(self.wlist):
            weight =  1 if self.classifyVector(w, vector) == 0 else -1
            sigma.append(weight * self.c[i])
        return 10 if np.sum(sigma) > 0 else self.classval
    
    def classifyVectorAveraged(self, vector):
        sigma = np.zeros(features)
        for (i, w) in enumerate(self.wlist):
            for (j, x) in enumerate(w):
                sigma[j] += w[j] * self.c[i]
        return 11 if np.dot(vector[:features], sigma) > 0 else self.classval
        
    def computeError(self, data):
        error = 0.0
        for featureVector in data:
            if self.classifyVector(self.w, featureVector) != featureVector[features]:
                error += 1.0
#                 print "classified as" + `self.classifyVector(featureVector)` + ", label is: " + `featureVector[features]`
        
        return error/len(data)
    
    def computeErrorVoted(self, data):
        error = 0.0
        for featureVector in data:
            if self.classifyVectorVoted(featureVector) != featureVector[features]:
                error += 1.0
#                 print "classified as" + `self.classifyVector(featureVector)` + ", label is: " + `featureVector[features]`
        
        return error/len(data)
    
    def computeErrorAveraged(self, data):
        error = 0.0
        for featureVector in data:
            if self.classifyVectorAveraged(featureVector) != featureVector[features]:
                error += 1.0
#                 print "classified as" + `self.classifyVector(featureVector)` + ", label is: " + `featureVector[features]`
        
        return error/len(data)


In [16]:
for i in range(1,4):
    p.runForIterationsVoted(i)
    print "Error for training data over " + `i` + " iterations is "+ `p.computeErrorVoted(trainingData)`
    print "Error for test data over " + `i` + " iterations is "+ `p.computeErrorVoted(testData)`

Error for training data over 1 iterations is 0.013
Error for test data over 1 iterations is 0.012
Error for training data over 2 iterations is 0.006
Error for test data over 2 iterations is 0.01
Error for training data over 3 iterations is 0.003
Error for test data over 3 iterations is 0.008


In [24]:
for i in range(1,4):
    p.runForIterationsVoted(i)
    print "Error for training data over " + `i` + " iterations is "+ `p.computeErrorAveraged(trainingData)`
    print "Error for test data over " + `i` + " iterations is "+ `p.computeErrorAveraged(testData)`

Error for training data over 1 iterations is 0.01
Error for test data over 1 iterations is 0.01
Error for training data over 2 iterations is 0.008
Error for test data over 2 iterations is 0.01
Error for training data over 3 iterations is 0.002
Error for test data over 3 iterations is 0.01


In [54]:
classifiers = [Perceptron("hw4btrain.txt", "hw4btest.txt", i) for i in range(0, 10)]

In [55]:
for p in classifiers:
    p.runForIterations(1)

In [62]:
confMatrix = [[float(0.0) for x in range(0, 10)] for x in range(0, 11)]
ns = [float(0.0) for x in range(0, 10)]

for vector in testData:
    label = int(vector[features])
    ns[label] += 1.0
    known = 10
    for i in range(0,10):
        if classifiers[i].classifyVector(classifiers[i].w, vector) == i:
            if known != 10:
                known = 10
                break
            else:
                known = i
    confMatrix[known][label] += 1.0
for (col, n) in enumerate(ns):
    for i in range(0, 11):
        confMatrix[i][col] /= n
for n in confMatrix:
    print n

[0.7777777777777778, 0.0, 0.0, 0.0, 0.0, 0.0, 0.009433962264150943, 0.0, 0.0, 0.0]
[0.0, 0.2755102040816326, 0.00980392156862745, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.5784313725490197, 0.018691588785046728, 0.01, 0.0, 0.009433962264150943, 0.0, 0.0, 0.00847457627118644]
[0.0, 0.0, 0.00980392156862745, 0.4205607476635514, 0.0, 0.010752688172043012, 0.0, 0.011904761904761904, 0.0, 0.0]
[0.0, 0.0, 0.00980392156862745, 0.0, 0.45, 0.010752688172043012, 0.0, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.3118279569892473, 0.0, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.010752688172043012, 0.4528301886792453, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.01, 0.0, 0.0, 0.34523809523809523, 0.0, 0.0]
[0.020202020202020204, 0.01020408163265306, 0.058823529411764705, 0.06542056074766354, 0.03, 0.15053763440860216, 0.09433962264150944, 0.011904761904761904, 0.8387096774193549, 0.01694915254237288]
[0.0, 0.0, 0.0, 0.0, 0.06, 0.010752688172043012, 0.009433962264150943, 0.03571428571428571, 0.01075268