In [None]:
# importing the libraries
import numpy as np
import pandas as pd 
import pandas as pd

In [None]:
# definging layer class
class Layer:
    def __init__(self, row, col):
        # to store outputs after activations
        self.outputs = []
        # to store derivative with respect to net input
        self.errorWRTnet = []
        self.init(row, col)
    
    def init(self, row, col):
        self.weights = np.random.randn(row, col)
        self.biases = np.zeros(row)

In [None]:
class NeuralNetwork:
    def __init__(self, sizes):
        self.sizes = sizes
        self.layers = []
    
    def initLayers(self):
        # Initializing all layers
        # for 1st layer we don't need weights and biases but for our 
        # convenience we are doing so
        self.layers.append(Layer(self.sizes[0], 1))
        for i in range(1, len(self.sizes)):
            layer = Layer(self.sizes[i], self.sizes[i-1])
            self.layers.append(layer)

    def setLearningRate(self, learningR):
        self.learningR = learningR

    def feedForward(self, inputs):
        # feeding all layers
        outputs = inputs
        # storing the inputs as outputs for using in backpropagations
        self.layers[0].outputs = outputs
        for i in range(1, len(self.sizes)):
            outputs = np.dot(outputs, self.layers[i].weights.T)
            outputs = self.activation(outputs)
            self.layers[i].outputs = outputs
        return outputs

    def activation(self, outputs):
        outputs = np.clip(outputs, -500, 500)
        return 1/(1 + np.exp(-outputs))
    
    # function to calculate mean squared error
    def claculateError(self, outputs, targets):
        error = np.square(np.subtract(targets, outputs)).mean()
        return error
    
    def backPropagation(self, outputs, targets):
        # array to store gradients for each layer
        gradients = [None]*len(self.sizes)

        # Backpropagation for the output layer
        errorWRToutput = np.subtract(outputs, targets)
        outWRTnet = np.multiply(outputs, 1 - outputs)
        errorWRTnet = np.multiply(errorWRToutput, outWRTnet)
        self.layers[len(self.sizes)-1].errorWRTnet = errorWRTnet
        netWRTweight = self.layers[len(self.sizes)-2].outputs
        errorWRTweight = np.multiply(netWRTweight, np.transpose([errorWRTnet]))
        gradients[len(self.sizes)-1] = errorWRTweight

        # Backpropagation for the hidden layers
        for i in reversed(range(1, len(self.sizes)-1)):
            errorWRToutput = np.dot(self.layers[i+1].errorWRTnet, self.layers[i+1].weights)
            outWRTnet = np.multiply(self.layers[i].outputs, 1 - self.layers[i].outputs)
            errorWRTnet = np.multiply(errorWRToutput, outWRTnet)
            self.layers[i].errorWRTnet = errorWRTnet
            netWRTweight = self.layers[i-1].outputs
            errorWRTweight = np.multiply(netWRTweight, np.transpose([errorWRTnet]))
            gradients[i] = errorWRTweight
        return gradients

    def updateWeights(self, gradients):
        for i in reversed(range(1, len(self.sizes))):
            newWeights = self.layers[i].weights - self.learningR*gradients[i]
            self.layers[i].weights = newWeights

    def trainModel(self, inputs, targets):
        for i in range(5):
            for input, target in zip(inputs, targets):
                output = self.feedForward(input)
                gradients = self.backPropagation(output, target)
                self.updateWeights(gradients)
    
    def predict(self, inputs):
        return self.feedForward(inputs)

In [None]:
# loading the training dataset
df = pd.read_csv('./dataset/mnist_train.csv')

In [None]:
# seperating label and features data
labels = df.iloc[:, 0].values
inputs = df.iloc[:, 1:].values

# converting labels to one hot
oneHots = np.zeros((labels.size, 10))
rows = np.arange(labels.size)
oneHots[rows, labels] = 1

In [None]:
# main part of building the model
network = NeuralNetwork([784, 50, 50, 50, 50, 10])
network.initLayers()
network.setLearningRate(.01)
network.trainModel(inputs, oneHots)

In [None]:
# running the training again manually
network.trainModel(inputs, oneHots)

In [None]:
# printing error of the model
for input, label in zip(inputs[:100], oneHots[:100]):
    output = network.feedForward(input)
    print(network.claculateError(output, label))

In [None]:
# import dataset for testing
df2 = pd.read_csv('./dataset/mnist_test.csv')

In [None]:
# seperating the features and labels
labels2 = df2.iloc[:100, 0].values
inputs2 = df2.iloc[:100, 1:].values

In [None]:
# testing the model
for input, label in zip(inputs2, labels2):
    output = network.feedForward(input)
    print('label:', label, 'result:', np.argmax(output))
