In [None]:
import math

#this number can be played with for new results if we have time
SMOOTHING_FACTOR = 0.9

class Dataset:
    def __init__(self, images_file, labels_file, lines_per_image = 28):
        self.images = []
        self.labels = []
        self.images_by_label = {}
        self.priors = {}
        with open(images_file) as file:
            EOF = False
            while not EOF:
                image = []
                for i in range(lines_per_image):
                    line = file.readline()
                    if not line:
                        EOF = True
                        break
                    image.append(list(line))
                if EOF:
                    break
                self.images.append(image)
        with open(labels_file) as file:
            for line in file:
                self.labels.append(line)
    def display(self, i):
        print("".join(map(lambda x: "".join(x),self.images[i])))
class Classifier:
    def __init__(self, label, hMap, prior):
        self.label = label
        self.hMap = hMap
        self.prior = prior
    
    #where should I compare the smoothed heatmap to the input image??
    def evaluate_likelihood(self, image):
        total = math.log10(self.prior)
        for i in range(self.hMap.mapSize):
            for j in range(self.hMap.mapSize):
                partial_eval = self.single_probability(i, j, image[i][j])
                if(partial_eval <= 0):
                    print("bad eval, can't take log at spot: " + str(i) + ", " + str(j))
                    self.printImage(image)
                else:
                    y=0
                    #print("valid logarithm")
                total += math.log10(partial_eval)
        return total
    
    def printImage(self, image):
        print("".join(map(lambda x: "".join(x),image)))
    
    def single_probability(self, x, y, testPixel):
        if(testPixel == ' '):
            #testVal = 0
            #number of times a pixel is 0 from all the training examples
            instances = self.hMap.totalExamples - self.hMap.hMap_count[x][y]
            return float(instances / self.hMap.totalExamples)
        elif testPixel != "\n":
            #testVal = 1
            instances = self.hMap.hMap_count[x][y]
            return float(instances / self.hMap.totalExamples)
            
            
class HeatMap:
    def __init__(self, label, mapSize = 28):
        #initially blank, smoothing will be added later
        self.hMap_count = [[SMOOTHING_FACTOR for i in range(mapSize)] for j in range(mapSize)]
        self.label = label
        self.mapSize = mapSize
        self.totalExamples = 2*SMOOTHING_FACTOR
                    
    def printHMap(self, smoothed = True):
        print("Heat map for digit: " + str(self.label) + " from " + str(self.totalExamples) + " examples")
        for i in range(self.mapSize):
            if smoothed:
                toPrint = ""
                for j in range(self.mapSize):
                    if self.hMap_count[i][j] < 1:
                        toPrint += " "
                    elif self.hMap_count[i][j] < 80:
                        toPrint += "*"
                    else:
                        toPrint += "X"
                print(toPrint)
                
            else:
                toPrint = ""
                for j in range(self.mapSize):
                    if self.hMap_count[i][j] < 3:
                        toPrint += " "
                    elif self.hMap_count[i][j] < 80:
                        toPrint += "*"
                    else:
                        toPrint += "X"
                print(toPrint)

    def addToHMap(self, image):
        self.totalExamples += 1
        for i in range(self.mapSize):
            for j in range(self.mapSize):
                if(image[i][j] != ' '):
                    self.hMap_count[i][j] += 1
                
    def labelProbability(self, trainingSetSize = 5000):
        return (self.totalExamples-2*SMOOTHING_FACTOR) / trainingSetSize
    
    def goodHMap(self):
        for i in range(self.mapSize):
            for j in range(self.mapSize):
                if(self.hMap_smoothed[i][j] <= 0):
                    return False
        
        return True

            
        

In [None]:
dataset = Dataset("trainingimages","traininglabels")

In [None]:
digitHMaps = [None]*10
for i in range(0,10):
    digitHMaps[i] = HeatMap(i)

for i  in range(len(dataset.images)):
    currLabel = int(dataset.labels[i])
    digitHMaps[currLabel].addToHMap(dataset.images[i])  
    


In [None]:
testData = Dataset("testimages","testlabels")

In [None]:
classifiers = [None]*10
for i in range(0,10):
    classifiers[i] = Classifier(i, digitHMaps[i], digitHMaps[i].labelProbability())
    

In [None]:
print(sum([classifier.prior for classifier in classifiers] ))
print(classifiers[0].hMap.hMap_count)

In [None]:
image = testData.images[5]
for i in range(10):
    #all these negative rn, idk what's wrong
    print(str(i) + ": chances are: " + str(classifiers[i].evaluate_likelihood(image)))

In [None]:
testData.display(5)