In [3]:
import math
from collections import defaultdict
import random

In [68]:
class Dataset:
    def __init__(self, images_file, labels_file, lines_per_image = 28):
        self.images = []
        self.labels = []
        self.counts_by_label = defaultdict(int)
        self.priors = {}
        with open(images_file) as file:
            EOF = False
            while not EOF:
                image = []
                for i in range(lines_per_image):
                    line = file.readline()
                    if not line:
                        EOF = True
                        break
                    image.append(list(line))
                if EOF:
                    break
                self.images.append(image)
        with open(labels_file) as file:
            for i,label in enumerate(file):
                label = int(label)
                self.labels.append(label)
                self.counts_by_label[label]+=1
    def display(self, i):
        print("".join(map(lambda x: "".join(x),self.images[i])))
    def __len__(self):
        return len(self.labels)
    
    def shuffleData(self):
        order = list(zip(self.labels, self.images))
        random.shuffle(order)
        tempL, tempI = zip(*order) #zip turns them into giant tuples, want in list form
        self.labels = list(tempL)
        self.images = list(tempI)
    
    
class Perceptron:
    def __init__(self, label):
        self.label = label                                
        self.bias = 0.1 #random.random() #commented because idk which one to ultimately use
        self.weightVector = [[self.bias for i in range(28)] for j in range(28)]
        self.totalCount = 0
        
        
    def display(self):
        print('\n'.join(str(self.weightVector[i]) for i in range(28)))
        
    def trainVectorOnCorrect(self, image):
        self.totalCount += 1 
        for i in range(28):
            for j in range(28):
                if(image[i][j] != ' ' and image[i][j] != '\n'):
                    self.weightVector[i][j] += 2 #not sure what exact values should be
    def trainVectorOnIncorrect(self, image):
        self.totalCount -= 1
        for i in range(28):
            for j in range(28):
                if(image[i][j] != ' ' and image[i][j] != '\n'):
                     self.weightVector[i][j] -=1
                        
    def imageEvaluation(self, image):
        likelihood = 0
        for i in range(28):
            for j in range(28):
                if(image[i][j] == ' '):
                    likelihood -= self.weightVector[i][j]
                elif image[i][j] != '\n':
                    likelihood += self.weightVector[i][j]
        return likelihood

In [69]:
trainingData = Dataset("trainingimages", "traininglabels")
perceptrons = [None]*10
for i in range(10):
    perceptrons[i] = Perceptron(i)

In [71]:
for i in range(len(trainingData.images)):
    currDigit = trainingData.labels[i]
    currPercept = perceptrons[currDigit]
    currPercept.trainVectorOnCorrect(trainingData.images[i])

In [72]:
testData = Dataset("testimages", "testlabels")
confusion_matrix_count = [[0 for i in range(10)] for j in range(10)]

for i in range(len(testData.images)):
    currImage = testData.images[i]
    chances = [perceptrons[j].imageEvaluation(currImage) for j in range(10)]
    bestGuess = chances.index(max(chances))
    actualLabel = testData.labels[i]
    if bestGuess == actualLabel :
        perceptrons[actualLabel].trainVectorOnCorrect(currImage)
        confusion_matrix_count[bestGuess][bestGuess] +=1
    else :
        perceptrons[actualLabel].trainVectorOnIncorrect(currImage)
        confusion_matrix_count[actualLabel][bestGuess] +=1

for i in range(10):
    print(confusion_matrix_count[i])

[70, 1, 0, 1, 0, 1, 13, 0, 0, 4]
[0, 108, 0, 0, 0, 0, 0, 0, 0, 0]
[3, 59, 22, 2, 1, 1, 10, 3, 1, 1]
[0, 38, 0, 53, 0, 0, 1, 6, 0, 2]
[0, 17, 0, 0, 55, 0, 4, 0, 0, 31]
[4, 22, 0, 25, 3, 14, 5, 3, 3, 13]
[0, 27, 0, 0, 6, 0, 58, 0, 0, 0]
[0, 32, 0, 0, 1, 0, 0, 62, 0, 11]
[2, 59, 0, 6, 1, 0, 4, 2, 13, 16]
[1, 13, 0, 1, 5, 0, 0, 0, 0, 80]
