# Building a Neural Network from Scratch

In [1]:
import math
import csv
import random 
import time
import numpy
import gzip
import os
import matplotlib.pyplot as plt

In [2]:
#### Miscellaneous functions
def Sigmoid(z):
    """The sigmoid function."""
    return 1.0/(1.0+numpy.exp(-z))

def SigmoidPrime(z):
    """Derivative of the sigmoid function."""
    return Sigmoid(z)*(1-Sigmoid(z))

In [3]:
class Network(object):

    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        
        #Initialize Weights & Biases
        self.biases = [numpy.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [numpy.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]

    def feedforward(self, a):
        for b, w in zip(self.biases, self.weights):
            a = Sigmoid(numpy.dot(w, a)+b)
        return a

    def Train(self, trainingData, epoch, batchSize, learningRate, testingData):

        testDataSize = len(testingData)
        trainDataSize = len(trainingData)
        for i in range(epoch):

            random.shuffle(trainingData)
            nabla_b = [numpy.zeros(b.shape) for b in self.biases]
            nabla_w = [numpy.zeros(w.shape) for w in self.weights]

            for j in range(0, trainDataSize, batchSize):

                nabla_b = [numpy.zeros(b.shape) for b in self.biases]
                nabla_w = [numpy.zeros(w.shape) for w in self.weights]
                miniBatchSize = 0
                
                for x, y in trainingData[j:j+batchSize]:
                    delta_nabla_b, delta_nabla_w = self.Backpropagation(x, y)

                    nabla_b = [(delta_nabla_b_row + nabla_b_row) for delta_nabla_b_row, nabla_b_row in zip(delta_nabla_b, nabla_b)]
                    nabla_w = [(delta_nabla_w_row + nabla_w_row) for delta_nabla_w_row, nabla_w_row in zip(delta_nabla_w, nabla_w)]
                    
                    miniBatchSize = miniBatchSize + 1

                self.weights = [w_layer-(learningRate/miniBatchSize)*nw_layer for w_layer, nw_layer in zip(self.weights, nabla_w)]
                self.biases = [b_layer -(learningRate/miniBatchSize)*nb_layer for b_layer, nb_layer in zip(self.biases, nabla_b)]
                
            print ("Training Epoch {0}: {1} / {2}".format(i, self.evaluate(trainingData), trainDataSize))

    def update_mini_batch(self, mini_batch, eta):
        nabla_b = [numpy.zeros(b.shape) for b in self.biases]
        nabla_w = [numpy.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
            self.weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(self.weights, nabla_w)]
            self.biases = [b-(eta/len(mini_batch))*nb for b, nb in zip(self.biases, nabla_b)]


    def Backpropagation(self, x, y):
        #Change in Biases
        nabla_b = [numpy.zeros(b.shape) for b in self.biases]

        #Change in Weights
        nabla_w = [numpy.zeros(w.shape) for w in self.weights]

        activations = []
        activations.append(x)
        zs = []

        for index, (weights, bias) in enumerate(zip(self.weights, self.biases)):
            individualWeightedSum = numpy.dot(weights, activations[index]) + bias
            zs.append(individualWeightedSum)
            activation = Sigmoid(individualWeightedSum)
            activations.append(activation)

        sigmodPrime = SigmoidPrime(zs[-1])
        delta = self.CostDerivative(activations[-1], y) * sigmodPrime 

        nabla_b[-1] = delta
        nabla_w[-1] = numpy.dot(delta, activations[-2].transpose())

        for l in range(2, len(self.weights)):
            z = zs[-l]
            sp = SigmodPrime(z)
            delta = numpy.dot(self.weights[-l+1], delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = numpy.dot(delta, activations[-l].transpose())

        return (nabla_b, nabla_w)

    def evaluate(self, test_data):
        test_results = [(numpy.argmax(self.feedforward(x)), numpy.argmax(y)) for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)

    def CostDerivative(self, output_activations, y):
        return (output_activations-y)


In [4]:
def LoadData(directory):
    trainingDataFilePath = "train-images-idx3-ubyte.gz"
    trainingLabelsFilePath = "train-labels-idx1-ubyte.gz"
    
    testingDataFilePath = "t10k-images-idx3-ubyte.gz"
    testingLabelFilePath = "t10k-labels-idx1-ubyte.gz"
    
    trainingDataFilePath = os.path.join(directory, trainingDataFilePath)
    trainingLabelsFilePath = os.path.join(directory, trainingLabelsFilePath)

    testingDataFilePath = os.path.join(directory, testingDataFilePath)
    testingLabelFilePath = os.path.join(directory, testingLabelFilePath)
    
    trainingData = LoadImagesAndLables(trainingDataFilePath, trainingLabelsFilePath)
    testingData = LoadImagesAndLables(testingDataFilePath, testingLabelFilePath)
    
    return trainingData, testingData

def LoadImagesAndLables(imageFilePath, labelFilePath):
    images = LoadImages(imageFilePath)
    labels = LoadLabels(labelFilePath)
    return zip(images, labels)

def LoadLabels(filePath):
    labels = gzip.open(filePath,'r')
    labels.read(8)
    
    labelVectors = []
    for chunk in iter(lambda: labels.read(1), b''):
        label = int.from_bytes(chunk, byteorder='big')
        labelVector = numpy.zeros((10,1))
        labelVector[label] = 1.0
        labelVectors.append(labelVector)  
        
    return labelVectors
    
def LoadImages(filePath):
    images = gzip.open(filePath,'r')
    images.read(8)
    
    #Get Image Height and Width
    imageHeight = int.from_bytes(images.read(4), byteorder='big')
    imageWidth = int.from_bytes(images.read(4), byteorder='big')
    
    imageVectors = []
    for chunk in iter(lambda: images.read(imageHeight*imageWidth), b''):
        image = numpy.frombuffer(chunk, dtype=numpy.uint8).astype(numpy.float32)
        image = image[:,None]
        imageVectors.append(image)
        
    return imageVectors

In [5]:
# image = numpy.asarray(datas[index]).squeeze()
# plt.imshow(image)

In [6]:
#Get Training and Testing Data
trainingData, testingData = LoadData("Data")

trainingData = list(trainingData)
testingData = list(testingData)

network = Network([784,200, 10])

network.Train(trainingData, 10, 128, 5, testingData)

  after removing the cwd from sys.path.


Training Epoch 0: 28220 / 60000
Training Epoch 1: 32915 / 60000
Training Epoch 2: 36664 / 60000
Training Epoch 3: 37641 / 60000
Training Epoch 4: 37854 / 60000
Training Epoch 5: 38312 / 60000
Training Epoch 6: 38439 / 60000
Training Epoch 7: 38505 / 60000
Training Epoch 8: 38668 / 60000
Training Epoch 9: 38741 / 60000
