In [4]:
#all the imports needed for the project
import struct
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import os

The MNIST data needs to be read into a pandas database first before continuing 
with the Neural Network. 

In [5]:

def read_idx_images(file_path):
    """ Function takes the unreadable MNIST file and converts it into a 
        pandas dataframe. Code was generated by ChatGPT"""
    
    with open(file_path, 'rb') as f:
        magic, num_images, num_rows, num_cols = struct.unpack(">IIII", f.read(16))
        image_data = np.frombuffer(f.read(), dtype=np.uint8)
        images = image_data.reshape((num_images, num_rows * num_cols))
        df = pd.DataFrame(images)
        return df

# Change to relative path later
file_path = '/Users/armaansidhu/Documents/Code/MNIST-NEURAL-NETWORK/t10k-images.idx3-ubyte'
data = read_idx_images(file_path)

Next steps include converting the dataframe into numpy array, shuffling the data, then splitting the data into training and test

In [None]:
data = np.array(data)
m, n = data.shape
np.random.shuffle(data) #shuffling the data

data_test = data[0:1000].T 
Y_test = data_test[0]
X_test = data_test[1:n]

data_train = data[1000:m].T
Y_train = data_train[0]
X_train = data_train[1:n]

In [None]:
def init_params():
    "initialising the weight matrices and the bias vectors"

    W1 = np.random.rand(10,784) - 0.5
    b1 = np.random.rand(10, 1) - 0.5
    W2 = np.random.rand(10, 10) - 0.5
    b2 = np.random.rand(10, 1) - 0.5
    return W1, b1, W2, b2

def ReLu(Z):
    "introduces non linear relationships into the model so that the model"
    "can learn non linear relationship"
    "reduces the vanishing gradient problem"

    return np.maximum(0, Z)

def softmax(Z):
    "gets the probability of each neuron in the output layer, all probablities"
    "add to 1"
    "digit with the highest probability is then picked"

    A = np.exp(Z)/sum(np.exp(Z))
    return A

def forward_propagation(W1, b1, W2, b2, X):
    "Forward pass of the neural network"

    Z1 = W1.dot(X) + b1             # First hidden layer linear transformation
    A1 = ReLu(Z1)                   # Adds Non linearity to the model
    Z2 = W2.dot(A1) + b2            # Linear transforamtion for output layer
    A2 = softmax(Z2)                # getting probability for each output neuron
    return Z1, A1, Z2, A2


def ReLu_deriv(Z):
    "Used in back propagation"

    return Z > 0                    # Returns an array of 1 and 0

def one_hot(Y):
    "Creates and array where it is all zeroes and 1 in the index of the correct"
    "digit"

    one_hot_Y = np.zeros((Y.size, Y.max() + 1))      # zeros array, right size
    one_hot_Y[np.arrange(Y.size), Y] = 1             # selects the correct index
    one_hot_Y = one_hot_Y.T                          # transposes
    return one_hot_Y

def backward_propagation(Z1, A1, Z2, A2, W1, W2, X, Y):
    "backwards pass of the neural network"
    "calculates the gradients of the weights and biases so that they can be"
    "updated"

    one_hot_Y = one_hot(Y)   # convert to one hot matrix
    dZ2 = A2 - one_hot_Y   # gradient loss of Z2
    dW2 = 1 / m * dZ2.dot(A1.T)   # Weight gradient for second layer
    db2 = 1 / m * np.sum(dZ2)   # bias gradient for second layer
    dZ1 = W2.T.dot(dZ2) * ReLu_deriv(Z1)   # Back propagate error into first layer
    dW1 = 1 / m * dZ1.dot(X.T)   # weight gradient for first layer
    db1 = 1 / m * np.sum(dZ1)   # bias gradient for first layer
    return dW1, db1, dW2, db2

def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    "Updating the wieght and biases according to the back propagation func"

    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1    
    W2 = W2 - alpha * dW2  
    b2 = b2 - alpha * db2    
    return W1, b1, W2, b2