In this notebook, I'm going to attempt to build a neural network from scratch, without using pytorch or tensorflow. This project is healily inspired by these two videos:
1. https://www.youtube.com/watch?v=w8yWXqWQYmU
2. https://www.youtube.com/watch?v=cAkMcPfY_Ns

This is going to be a very simple model. I'm using the MNIST database to train this network on handwritten variations of numbers, and identifying them correctly (hopefully)
This is going to be a simple network with basically 3 layers, an input layer and 2 hidden layers leading to an output. The first hidden layer will be a ReLU function and the second layer will implement a simple a softmax activation function (similar to the first link).

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
# Reading the data
test_data = pd.read_csv('data/test.csv') 
train_data = pd.read_csv('data/train.csv')

In [8]:
# Converting into numpy array
train_data_numpy = np.array(train_data)
m, n = train_data_numpy.shape

data_train = train_data_numpy.T
Y_train = data_train[0]
X_train = data_train[1:n]
X_train = X_train / 255.
_,m_train = X_train.shape

In [9]:
Y_train

array([1, 0, 1, ..., 7, 6, 9], dtype=int64)

In [None]:
# Now this is the real stuff, moving onto the fundamentals of the network

# Parameters for the back propogation
def init_params():
    W1 = np.random.randn(10, 784) - 0.5  # (We are using 784/784 resolution pictrues)
    b1 = np.random.randn(10, 1) - 0.5    # This is for the hidden layer, where we determine if it's a number between 1 and 10.
    W2 = np.random.randn(10,10) - 0.5    # Similar, but for the second hidden layer
    b2 = np.random.randn(10,1) - 0.5
    return W1, b1, W2, b2

# Rectified Linear Unit
def ReLU(x):
    for i in len(x):
        x[i] = x[i] if x[i] > 0 else 0

# Softmax activation function
def softmax(x):
    sum_exp = sum(np.exp(x))
    softmax = np.exp(x)/sum_exp

    return softmax

def encode_answers(x):
    enc = np.zeros((x.size, x.max() + 1))
    enc[np.arrange(x.size), x] = 1
    enc = enc.T

    return enc

def forward(W1, b1, W2, b2, x):
    Z1 = W1.dot(x) + b1
    A1 = ReLU(Z1)           # First hidden layer
    Z2 = W2.dot(x) + b2
    A2 = softmax(Z2)        # Second hidden layer

    return Z1, A1, Z2, A2

def backward(Z1, A1, Z2, A2, W1, W2, x, Y):
    encoding = encode_answers(Y)
    dZ2 = A2 - encoding
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) * (Z1 > 0)
    dW1 = 1 / m * dZ1.dot(x.T)
    db1 = 1 / m * np.sum(dZ1)

    return dW1, db1, dW2, db2
 
def update_params(W1, b1, W2, b2, dW1, dW2, db1, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1    
    W2 = W2 - alpha * dW2  
    b2 = b2 - alpha * db2    
    return W1, b1, W2, b2