In [2]:
import os
import matplotlib.pyplot as plt
import numpy as np
import csv
from matplotlib.animation import FuncAnimation
from celluloid import Camera
import random

In [3]:
NO_ANSWER = 9999
TRAIN_DATA_LOC = "C:\\Users\\17Z990\\Downloads\\Repos\\MachineLearningAttempts\\TrainingData\\mnist_train.csv"
TEST_DATA_LOC = "C:\\Users\\17Z990\\Downloads\\Repos\\MachineLearningAttempts\\TrainingData\\mnist_test.csv"
VALID_DATA_LOC = "C:\\Users\\17Z990\\Downloads\\Repos\\MachineLearningAttempts\\TrainingData\\mnist_valid.csv"
OG_TRAIN_DATA_LOC = "C:\\Users\\17Z990\\Downloads\\Repos\\MachineLearningAttempts\\TrainingData\\OG_mnist_train.csv"
LAYERS = [784,16,10]

I got the original csv version of the MNIST database here:
https://pjreddie.com/projects/mnist-in-csv/
and then I seperated the training portion into a validation portion and a new training portion using this code below

In [23]:
trainData = openCSV(OG_TRAIN_DATA_LOC)
with open(VALID_DATA_LOC,'w', newline = '') as validData:
    validWriter = csv.writer(validData)
    for i in range(10000):
        validWriter.writerow(trainData[i])
with open(TRAIN_DATA_LOC,'w',newline = '') as newData:
    newWriter = csv.writer(newData)
    for i in range(50000):
        newWriter.writerow(trainData[i + 10000])
        

This is where all the Data is initially handled and all the functions pertaining to it. The data was seperated for use into 3 different csv files for each type of data. train_mnist.csv is the training data containing 50,000 examples, test_mnist.csv is the testing data containing 10,000 examples, and mnist_valid.csv is the validation data used to derive hyper-parameters and it contains 10,000 examples. 

In [12]:
def openCSV(dataLocation):
    file = open(dataLocation,'r')
    dataReader = csv.reader(file)
    file = []
    for row in dataReader:
        file.append(row)
    return file

In [5]:
def getMNISTData(dataLocation):
    mnistData = []
    with open(dataLocation,'r') as data:
        dataReader = csv.reader(data)
        for row in dataReader:
            ans = row.pop(0)
            img = row
            
            mnistData.append((img,ans))
    return mnistData

In [6]:
#Takes a list/array which is 785 or 784 ints long depending on whether an answer is present
#Takes a boolean stating whether answers are present are at the beginning (assumes true)
def showData(numArray, answerPresent=True):
    if type(numArray) != list and type(numArray) != np.ndarray:
        raise ValueError("The numArray parameter is the wrong value. Try again with either a list or array")
    if type(answerPresent) != bool:
        raise ValueError("The answerPresent parameter is the wrong value. Try again with a boolean")
    if answerPresent:
        if len(numArray) != 785:
            raise IndexError("Your array is bigger than allowed. Try checking the answerPresent parameter")
    if not answerPresent:
        if len(numArray) != 784:
            raise IndexError("Your array is bigger than allowed. Try checking the answerPresent parameter")
    
    if answerPresent:
        ans = numArray[0]
        imgArray = numArray[1:]
    else:
        imgArray = numArray
        ans = NO_ANSWER
        
    for i in range(len(imgArray)):
        imgArray[i] = float(imgArray[i])
        showArray = np.array(imgArray).reshape(28,28)
    plt.imshow(showArray,'gray')
    plt.show()
    return ans

These are the supporting functions for the neural net class that are universal enough to exist outside it. In addition is a function that generates noise to possibly feed into the neural net.

In [7]:
def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

In [8]:
def dervSigmoid(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [9]:
def genRandData():
    randData = np.random.choice(256,784) #Makes a random array with a length of 784 with numbers between 0 and 255
    randData = np.concatenate(([NO_ANSWER],randData))
    return randData

This is the class for the neural net that will hopefully make the network simple to use. It is the bare bones approach to machine learning but it will be improved, expanded and duplicated in the future.

In [10]:
class NeuralNet(object):
    def __init__(self,layers):
        self.numLayers = len(layers)
        #Creates 1 bias for every neuron in the layers after input
        self.biases = [np.random.randn(b,1) for b in layers[1:]]
        #Creates weights for every other weight connected to it
        self.weights = [np.random.randn(y,x) for x,y in zip(layers[:-1],layers[1:])]
    
    def costFunc(self,netAns,targetAns):
        x = np.subtract(targetAns,netAns)
        topArray = np.square(x)
        cost = np.divide(topArray,2)
        return cost
    
    def dervCostFunc(self,netAns,targetAns):
        return(netAns - targetAns)

    def feedForward(self,a):
        for b,w in zip(self.biases,self.weights):
            a = sigmoid(np.dot(w,a) + b)
        return a
    
    def backProp(self, x, y):
        biasesDerv = [np.zeros(b.shape) for b in self.biases]
        weightsDerv = [np.zeros(w.shape) for w in self.weights]
        a = np.array(x).astype('float64')
        activations = [a]
        zList = [] #List storing the z's 
        #Basically doing feedforward but noting the z's 
        #Could be combined with feedforward function in future
        for w,b in zip(self.weights,self.biases):
            z = np.dot(w,a) + b
            zList.append(z)
            a = sigmoid(z)
            activations.append(a)
            
        #Calculating the error for the first layer

        error = self.dervCostFunc(activations[-1],float(y)) * dervSigmoid(zList[-1])
        biasesDerv[-1] = error 
        weightsDerv[-1] = np.dot(error,activations[-2].transpose())
        
        #Doing the backpropogation stuff
        for i in range(2,self.numLayers):
            z = zList[-i]
            w = self.weights[-i + 1].transpose()
            error = np.dot(w,error) * dervSigmoid(z)
            biasesDerv[-i] = error
            weightsDerv[-i] = np.dot(error,activations[-i - 1].transpose())
        
        return(biasesDerv,weightsDerv)
    
    #Runs the gradient descent algorithm for a small part of the data called an batch which makes up 1 epoch
    def runBatch(self,epoch,learnRate):
        deltaB = [np.zeros(b.shape) for b in self.biases]
        deltaW = [np.zeros(w.shape) for w in self.weights]
        for x,y in epoch:
            singDeltB,singDeltW = self.backProp(x,y)
            deltaB = [db + sdb for db,sdb in zip(deltaB,singDeltB)]
            deltaW = [dw + sdw for dw,sdw in zip(deltaW,singDeltW)]
        self.weights = [w - ((learnRate * len(epoch)) * dw) for w,dw in zip(self.weights,deltaW)]
        self.biases = [b - ((learnRate * len(epoch)) * db) for b,db in zip(self.biases,deltaB)]
        
        
    def testNet(self,testData):
        testResults = [(np.argmax(self.feedForward(x)),y) for x,y in testData]
        return sum(int(x == y) for x,y in testResults)
        
    
    def stocGradientDescent(self,trainingData,epochs,batchSize,learnRate,testData = None):
        if testData:
            numTest = len(testData)
        numTrain = len(trainingData)
        
        for ex in range(epochs):
            random.shuffle(trainingData)
            batches = [trainingData[k:k + batchSize] for k in range(0,numTrain,batchSize)]
            for batch in batches:
                self.runBatch(batch,learnRate)
            if testData:
                print("Epoch {0}: {1} / {2}".format(i,self.testNet(),numTest))
            else:
                print("Epoch {0} complete").format(i)
            
        

In [9]:
trainData = getMNISTData(TRAIN_DATA_LOC)

In [10]:
testData = getMNISTData(TEST_DATA_LOC)

In [30]:
net = NeuralNet(LAYERS)

In [31]:
net.stocGradientDescent(trainData, 30, 10, 3.0, testData=testData)

ValueError: shapes (16,16) and (784,) not aligned: 16 (dim 1) != 784 (dim 0)