In [36]:
import math

#this number can be played with for new results if we have time
SMOOTHING_FACTOR = 0.9

class Dataset:
    def __init__(self, images_file, labels_file, lines_per_image = 28):
        self.images = []
        self.labels = []
        self.images_by_label = {}
        self.priors = {}
        with open(images_file) as file:
            EOF = False
            while not EOF:
                image = []
                for i in range(lines_per_image):
                    line = file.readline()
                    if not line:
                        EOF = True
                        break
                    image.append(list(line))
                if EOF:
                    break
                self.images.append(image)
        with open(labels_file) as file:
            for line in file:
                self.labels.append(line)
    def display(self, i):
        print("".join(map(lambda x: "".join(x),self.images[i])))
class Classifier:
    def __init__(self, label, hMap, prior):
        self.label = label
        self.hMap = hMap
        self.prior = prior
    
    #where should I compare the smoothed heatmap to the input image??
    def evaluate_likelihood(self, image):
        total = math.log10(self.prior)
        for i in range(self.hMap.mapSize):
            for j in range(self.hMap.mapSize):
                partial_eval = self.single_probability(i, j, image[i][j])
                if(partial_eval <= 0):
                    print("bad eval, can't take log at spot: " + str(i) + ", " + str(j))
                    self.printImage(image)
                else:
                    y=0
                    #print("valid logarithm")
                total += math.log10(partial_eval)
        return total
    
    def printImage(self, image):
        print("".join(map(lambda x: "".join(x),image)))
    
    def single_probability(self, x, y, testPixel):
        if(testPixel == ' '):
            #testVal = 0
            #number of times a pixel is 0 from all the training examples
            instances = self.hMap.totalExamples - self.hMap.hMap_count[x][y]
            return float(instances / self.hMap.totalExamples)
        elif testPixel != "\n":
            #testVal = 1
            instances = self.hMap.hMap_count[x][y]
            return float(instances / self.hMap.totalExamples)
            
            
class HeatMap:
    def __init__(self, label, mapSize = 28):
        #initially blank, smoothing will be added later
        self.hMap_count = [[SMOOTHING_FACTOR for i in range(mapSize)] for j in range(mapSize)]
        self.label = label
        self.mapSize = mapSize
        self.totalExamples = 2*SMOOTHING_FACTOR
                    
    def printHMap(self, smoothed = True):
        print("Heat map for digit: " + str(self.label) + " from " + str(self.totalExamples) + " examples")
        for i in range(self.mapSize):
            if smoothed:
                toPrint = ""
                for j in range(self.mapSize):
                    if self.hMap_count[i][j] < 1:
                        toPrint += " "
                    elif self.hMap_count[i][j] < 80:
                        toPrint += "*"
                    else:
                        toPrint += "X"
                print(toPrint)
                
            else:
                toPrint = ""
                for j in range(self.mapSize):
                    if self.hMap_count[i][j] < 3:
                        toPrint += " "
                    elif self.hMap_count[i][j] < 80:
                        toPrint += "*"
                    else:
                        toPrint += "X"
                print(toPrint)

    def addToHMap(self, image):
        self.totalExamples += 1
        for i in range(self.mapSize):
            for j in range(self.mapSize):
                if(image[i][j] != ' '):
                    self.hMap_count[i][j] += 1
                
    def labelProbability(self, trainingSetSize = 5000):
        return (self.totalExamples-2*SMOOTHING_FACTOR) / trainingSetSize
    
    def goodHMap(self):
        for i in range(self.mapSize):
            for j in range(self.mapSize):
                if(self.hMap_smoothed[i][j] <= 0):
                    return False
        
        return True

            
        

In [37]:
dataset = Dataset("trainingimages","traininglabels")

In [38]:
digitHMaps = [None]*10
for i in range(0,10):
    digitHMaps[i] = HeatMap(i)

for i  in range(len(dataset.images)):
    currLabel = int(dataset.labels[i])
    digitHMaps[currLabel].addToHMap(dataset.images[i])  
    


In [39]:
testData = Dataset("testimages","testlabels")

In [40]:
classifiers = [None]*10
for i in range(0,10):
    classifiers[i] = Classifier(i, digitHMaps[i], digitHMaps[i].labelProbability())
    

In [41]:
print(sum([classifier.prior for classifier in classifiers] ))
print(classifiers[0].hMap.hMap_count)

0.9999999999999999
[[0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9], [0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9], [0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9], [0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 1.9, 4.9, 7.9, 11.9, 14.9, 15.9, 14.9, 15.9, 12.9, 6.9, 4.9, 1.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9], [0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 1.9, 4.9, 11.9, 24.9, 58.9, 100.9, 137.9, 177.9, 202.9, 197.9, 201.9, 171.9, 132.9, 84.9, 53.9, 30.9, 11.9, 2.9, 0.9, 0.9, 0.9], [0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 1.9, 3.9, 13.9, 39.9, 88.9, 130.9, 195.9, 268.9, 325.9, 364.9, 372.9, 361.9, 306.9, 249.9, 171.9, 119.9, 61.9, 26.9, 4.9, 0.9, 0.9, 0.9], [0.9, 0.9, 0.9, 0.9, 2.9, 2.9, 3.9, 11.9, 33.9, 90.9, 151.9, 209.9, 281.

In [42]:
image = testData.images[5]
for i in range(10):
    #all these negative rn, idk what's wrong
    print(str(i) + ": chances are: " + str(classifiers[i].evaluate_likelihood(image)))

0: chances are: -153.37358294784764
1: chances are: -108.18750607047035
2: chances are: -108.93774228409418
3: chances are: -97.06616648972424
4: chances are: -73.81772446690626
5: chances are: -87.8020148912335
6: chances are: -109.41046350990337
7: chances are: -77.43755248612925
8: chances are: -82.32387802753301
9: chances are: -54.492572632569164


In [46]:
predictions = []
for image in testData.images:
    chances = [0]*10
    for i in range(10):
        chances[i] = classifiers[i].evaluate_likelihood(image)
    label = chances.index(max(chances))
    predictions.append(label)


In [69]:
wrongs = [0]*10
confusion_matrix = [[0 for i in range(10)] for j in range(10)]
for i in range(len(predictions)):
    if predictions[i] != int(testData.labels[i]):
        wrongs[predictions[i]] +=1
        confusion_matrix[int(testData.labels[i])][predictions[i]] +=1


for i in range(10):
    print(str(i) + " label success rate: " + str(100-(100*(wrongs[i]/(digitHMaps[i].totalExamples-2*SMOOTHING_FACTOR)))) + "%")

print("confusion matrix")
for i in range(10):
    print(str(confusion_matrix[i]))


0 label success rate: 98.53862212943632%
1 label success rate: 96.0923623445826%
2 label success rate: 97.1311475409836%
3 label success rate: 93.30628803245436%
4 label success rate: 95.70093457943925%
5 label success rate: 94.70046082949308%
6 label success rate: 96.8063872255489%
7 label success rate: 97.81818181818181%
8 label success rate: 95.45454545454545%
9 label success rate: 88.28282828282828%
confusion matrix
[0, 0, 1, 0, 1, 5, 3, 0, 4, 0]
[0, 0, 1, 0, 0, 2, 1, 0, 0, 0]
[1, 3, 0, 4, 1, 0, 6, 1, 5, 2]
[0, 2, 0, 0, 0, 3, 2, 6, 2, 6]
[0, 1, 0, 0, 0, 0, 3, 1, 2, 18]
[2, 2, 1, 12, 3, 0, 1, 1, 2, 6]
[1, 6, 4, 0, 4, 5, 0, 0, 2, 0]
[0, 6, 3, 0, 3, 0, 0, 0, 3, 14]
[2, 1, 3, 14, 2, 6, 0, 1, 0, 12]
[1, 1, 1, 3, 9, 2, 0, 2, 1, 0]
