In [1]:
#Writing a Neural Network Class from Scratch

import numpy as np
        
class NeuralNetwork:
    def __init__(self, neuronsperlayer): #Sets Up Weights and Biases of Neural Network
        self.noflayers = len(neuronsperlayer)
        #Xavier Initialisation
        self.w = [np.random.normal(0.0, (2.0 / (neuronsperlayer[i] + neuronsperlayer[i+1])) ** 0.5, size=(neuronsperlayer[i+1], neuronsperlayer[i])) for i in range(self.noflayers-1)]
        self.b = [np.zeros((neuronsperlayer[i+1], 1)) for i in range(self.noflayers-1)]
    
    @staticmethod
    def sigmoid(arr): #Activation Function of a Neuron
        return 1 / (1 + np.exp(-arr))

    @staticmethod
    def derivative_sigmoid(arr): #Derivative of Activation Function
        sigmoid_value = NeuralNetwork.sigmoid(arr)
        return sigmoid_value * (1 - sigmoid_value)
        
    def forward(self, input_layers): #Forward Pass through Neural Network                 
        self.a = [input_layers]
        self.z = []
        
        for W, b in zip(self.w, self.b):
            Z = W @ self.a[-1] + b        
            self.z.append(Z)
            a = self.sigmoid(Z)  
            self.a.append(a)
            
        return self.a[-1]         
    
    @staticmethod
    def loss(output_layers, targets): #Loss Function
        return 0.5 * np.sum((targets - output_layers)**2) / targets.shape[1]
    
    def backward(self, targets): #Backpropagation of Neural Network --> Provides Gradient of Loss Function
        N = targets.shape[1]
        delta = (self.a[-1] - targets) * self.derivative_sigmoid(self.z[-1])
        self.grad_b = [delta.sum(axis=1, keepdims=True) / N]
        self.grad_w = [delta @ self.a[-2].T / N]

        for i in range(self.noflayers-2, 0, -1):
            delta = (self.w[i].T @ delta) * self.derivative_sigmoid(self.z[i-1])
            self.grad_b.append(delta.sum(axis=1, keepdims=True) / N)
            self.grad_w.append(delta @ self.a[i-1].T / N)

        self.grad_w.reverse()
        self.grad_b.reverse()
        
    def update(self, learning_rate): #Updating the Weights and Biases of Neural Network
        for i in range(self.noflayers-1):
            self.w[i] -= learning_rate * self.grad_w[i]
            self.b[i] -= learning_rate * self.grad_b[i]
            
    def learn(self, training_data, target, epochs, learning_rate, batch_size): #Stochastic Gradient Descent 
        N = training_data.shape[1]
        
        for epoch in range(1, epochs+1):
            permutation = np.random.permutation(N)
            training_data, target = training_data[:, permutation], target[:, permutation]

            for i in range(0, N, batch_size):
                input_batch = training_data[:, i:i+batch_size]
                target_batch = target[:, i:i+batch_size]
                output_layers = self.forward(input_batch)
                self.backward(target_batch)
                self.update(learning_rate)

            if epoch % 500 == 0 or epoch == 1:
                loss = self.loss(self.forward(training_data), target)
                print(f"Epoch {epoch:3d} - Loss: {loss:.4f}")


In [2]:
import numpy as np
import pickle

with open("mnist.pkl", "rb") as f: #Loading in Data
    (train_X, train_y), (valid_X, valid_y), (test_X, test_y) = pickle.load(f, encoding="latin1")

#Not Using Validation Data so Stacking it with the Training Data
train_X = np.vstack([train_X, valid_X]) 
train_y = np.hstack([train_y, valid_y])

#Changing the y/Output Data as Vectors
#Ex: 3 --> [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
num_classes = 10
eye = np.eye(num_classes)
train_Y_1hot = eye[train_y]
test_Y_1hot  = eye[test_y]

#Final Set Up of Data
X_train = train_X.T
Y_train = train_Y_1hot.T
X_test  = test_X.T

#Generating Neural Network and Making it Learn
NN = NeuralNetwork(neuronsperlayer=[784, 128, 64, 10])
NN.learn(X_train, Y_train, epochs=2000, learning_rate=0.1, batch_size=128)


Epoch   1 - Loss: 0.4452
Epoch 500 - Loss: 0.0145
Epoch 1000 - Loss: 0.0062
Epoch 1500 - Loss: 0.0035
Epoch 2000 - Loss: 0.0025


In [3]:
#Testing the Network for its Accuracy
logits = NN.forward(X_test)
preds = logits.argmax(axis=0)
acc = (preds == test_y).mean()

print(f"\nTest Accuracy: {acc*100:.2f}%")



Test Accuracy: 98.09%
