In [1]:
import gzip
import numpy as np
from matplotlib import pyplot as plt


In [2]:
def get_data(inputs_file_path, labels_file_path, num_examples):
    digit_size = 28*28
    with open(inputs_file_path, 'rb') as f, gzip.GzipFile(fileobj=f) as bytestream:
        inputs = np.zeros((num_examples,digit_size), dtype = np.float32)
        bytestream.read(16)
        for i in range(num_examples):
            for j in range(digit_size):
                inputs[i,j] = np.frombuffer(bytestream.read(1), dtype = np.uint8)
        inputs = inputs/255
        inputs = inputs.astype(np.float32)
    
    with open(labels_file_path, 'rb') as f, gzip.GzipFile(fileobj=f) as bytestream:
        labels = np.zeros((num_examples), dtype = np.uint8)
        bytestream.read(8)
        for i in range(num_examples):
            labels[i] = np.frombuffer(bytestream.read(1), dtype = np.uint8)

    return(inputs,labels)

In [10]:
class Model:
    def __init__(self):
        self.input_size = 784 # Size of image vectors
        self.num_classes = 10 # Number of classes/possible labels
        self.batch_size = 100
        self.learning_rate = .5

        np.random.seed(42)  # same seed as used in lab
        self.W = np.random.normal(size = (self.num_classes, self.input_size))
        self.b = np.random.normal(size = (self.num_classes))

    def __call__(self, inputs):
        soft = np.zeros((len(inputs),self.num_classes))
        
        for i in range(len(inputs)):
            soft[i,] = np.matmul(self.W, inputs[i])
            soft[i,] = np.add(soft[i,], self.b)
            soft[i,] = np.exp(soft[i,] - np.max(soft[i,]))

        return soft

    def loss(self, probabilities, labels):
        CEL = 0
        
        lblLen = len(labels)
        vals = np.arange(lblLen)
        oneHotLabels = np.zeros((lblLen,self.num_classes))
        oneHotLabels[vals, labels] = 1
        
        
        temp = -np.sum(oneHotLabels*np.log(probabilities))# + np.multiply((1-labels), np.log(1-probabilities))
        temp = (temp/lblLen).astype(np.float32) #Change for batch later
        CEL += temp
        
        #CEL = -labels*np.log(probabillities)
        
        return CEL

    def backpropagation(self, inputs, probabilities, labels):
        probclass = np.argmax(probabilities, 1)
        
        BPw = np.zeros((self.num_classes, self.input_size))
        BPb = np.zeros((self.num_classes))
        
        for i in range(self.input_size-1):
            for j in range(self.num_classes):
                for c in range(len(labels)):
                    if probclass[i] == labels[j]:
                        BPw[:,i] += -self.learning_rate*inputs[c,i]*(probabilities[c,j] - 1)
                        BPb += -self.learning_rate*inputs[c,i]*(probabilities[c,j] - 1)
                    else:
                        BPw[:,i] += -self.learning_rate*inputs[c,i]*probabilities[c,j]
                        BPb += -self.learning_rate*inputs[c,i]*probabilities[c,j]
        
    
        return (BPw, BPb)

    def accuracy(self, probabilities, labels):
        probclass = np.argmax(probabilities, 1)
        
        correct = 0
        for i in range(len(probclass)):
            if probclass[i] == labels[i]:
                correct += 1
        
        print(correct)
        acc = correct/len(labels)
        print('accuracy')
        print(acc)
        
        return acc

    def gradient_descent(self, gradW, gradB):
        self.W = self.W - self.learning_rate * gradW
        self.b = self.b - self.learning_rate * gradB
        
        pass

    def train(model, train_inputs, train_labels):
        print('call')
        probs = model(train_inputs)
        print('loss')
        CEL = model.loss(probs, train_labels)
        print('backprop')
        (BPw, BPb) = model.backpropagation(train_inputs, probs, train_labels)
        print(BPw.shape)
        print('accuracy')
        ACC = model.accuracy(probs, train_labels)
        print('gradient descent')
        model.gradient_descent(BPw, BPb)
    
        visualize_loss(CEL)
        
        pass

    def test(model, test_inputs, test_labels):
        probs = model(test_inputs)
        ACC = model.accuracy(probs, test_labels)
        
        return ACC

    def visualize_loss(losses):
        x = np.arange(1, len(losses)+1)
        plt.xlabel('i\'th Batch')
        plt.ylabel('Loss Value')
        plt.title('Loss per Batch')
        plt.plot(x, losses)
        plt.show()

In [None]:
def main():
    print('loading data')
    (train_inputs,train_labels) = get_data('MNIST_data/train-images-idx3-ubyte.gz', 
                                           'MNIST_data/train-labels-idx1-ubyte.gz',60000)
    (test_inputs,test_labels) = get_data('MNIST_data/t10k-images-idx3-ubyte.gz', 
                                         'MNIST_data/t10k-labels-idx1-ubyte.gz',10000)

    print('creating model')
    NN = Model()
    bat_inputs = np.zeros(NN.input_size*NN.batch_size)
    bat_labels = np.zeros(NN.batch_size)

    print('training')
    NN.train(train_inputs, train_labels)

    print('testing')
    NN.test(test_inputs, test_labels)

    print('done')
    
    pass

if __name__ == '__main__':
    main()

loading data


  inputs[i,j] = np.frombuffer(bytestream.read(1), dtype = np.uint8)
  labels[i] = np.frombuffer(bytestream.read(1), dtype = np.uint8)


creating model
training
call
loss
backprop
