In [11]:
# libraries
import pandas as pd
import numpy as np

In [12]:
# defining necessary functions

def softmax(z):
    z = np.clip(z, -100, 100)
    z = z - np.max(z)
    exps = np.exp(z)
    return exps / np.sum(exps)

def leaky_relu(x):
    for  i, n in enumerate(x):
        for k,_ in enumerate(n):
            x[i,k] = np.maximum(0.01 * x[i,k], x[i,k])
            
    return x

def D_leaky_relu(x):
    for  i, n in enumerate(x):
        for k,_ in enumerate(n):
            x[i,k] = 0.01 if x[i,k] <= 0 else 1
            
    return x

def row(m):
    m = m.reshape(-1,1)
    return m

In [13]:
# hyperparamaters, weights, biases, dataset imports

W1 = np.random.randn(16, 784) * np.sqrt(2 / ((1 + 0.01**2) * 784))
W2 = np.random.randn(16, 16) * np.sqrt(2 / ((1 + 0.01**2) * 16))
W3 = np.random.randn(10, 16) * np.sqrt(1 / (16 + 10))
b1 = np.zeros((16,1))
b2 = np.zeros((16,1))
b3 = np.zeros((10,1))

eta = 0.05

train_df = pd.read_csv("CSVs\\mnist_train.csv").sample(frac=1).reset_index(drop=True)
test_df = pd.read_csv("CSVs\\mnist_test.csv").sample(frac=1).reset_index(drop=True)
train_img = np.array(train_df.iloc[:,1:]) / 255
train_label = np.array(train_df.iloc[:,0])
test_img = np.array(test_df.iloc[:,1:]) / 255
test_label = np.array(test_df.iloc[:,0])

In [14]:
def ForwardPass(neuralInput):
    neuralInput = row(neuralInput)
    z1 = W1 @ neuralInput + b1
    a1 = leaky_relu(z1)
    z2 = W2 @ a1 + b2
    a2 = leaky_relu(z2)
    neuralOutput = softmax(W3 @ a2 + b3)
    return [neuralOutput, z2, z1]

dW3 = np.zeros(np.shape(W3))
dW2 = np.zeros(np.shape(W2))
dW1 = np.zeros(np.shape(W1))
db3 = np.zeros(np.shape(b3))
db2 = np.zeros(np.shape(b2))
db1 = np.zeros(np.shape(b1))

def backPass(img, label):
    
    global dW3,dW2,dW1,db1,db2,db3
    y = np.zeros((10,1))
    y[label] = 1
    
    NO, z2, z1 = ForwardPass(img)
    
    dW3 += (NO - y)@leaky_relu(z2).T
    db3 += (NO - y)
    da2 = row(np.sum((NO - y)*W3, axis=0))
    dW2 += da2*D_leaky_relu(z2)@leaky_relu(z1).T
    db2 += da2*D_leaky_relu(z2)
    da1 = row(np.sum(da2*D_leaky_relu(z2)*W2, axis=0))
    dW1 += (da1*D_leaky_relu(z1))@img.reshape(1, -1)
    db1 += da1*D_leaky_relu(z1)
    
def backProp(img_batch, lable_batch):
    
    global dW3,dW2,dW1,db1,db2,db3
    dW3 *= 0
    dW2 *= 0
    dW1 *= 0
    db3 *= 0
    db2 *= 0
    db1 *= 0
    
    global W1,W2,W3,b1,b2,b3
    
    for img, label in zip(img_batch, lable_batch):
        backPass(img, label)
        
    max_grad_norm = 1.0
    grads = [dW1, dW2, dW3, db1, db2, db3]
    for grad in grads:
        np.clip(grad, -max_grad_norm, max_grad_norm, out=grad)
        
    W1 -= eta * dW1 / 120
    W2 -= eta * dW2 / 120
    W3 -= eta * dW3 / 120
    b1 -= eta * db1 / 120
    b2 -= eta * db2 / 120
    b3 -= eta * db3 / 120
    

In [15]:
def epoch():
    for i in range(500):
        backProp(train_img[i*120:i*120+120,:], train_label[i*120:i*120+120])

In [16]:
correct_guesses = 0
def test_batch(imgs, labels):
    global correct_guesses
    for img,label in zip(imgs,labels):
        if np.argmax(ForwardPass(row(img))[0]) == label:
            correct_guesses += 1
            
def accuracy():
    global correct_guesses
    for i in range(50):
        test_batch(test_img[i*200:i*200+200,:],test_label[i*200:i*200+200])
        
    print("Test accuracy:",(correct_guesses/100))
    correct_guesses = 0
    for i in range(300):
        test_batch(train_img[i*200:i*200+200,:],train_label[i*200:i*200+200])
        
    print("Train accuracy:",(correct_guesses/600))
    correct_guesses = 0
    

In [17]:
for i in range(18):
    epoch()
    accuracy()

Test accuracy: 81.66
Train accuracy: 81.29833333333333
Test accuracy: 88.44
Train accuracy: 87.63166666666666
Test accuracy: 89.94
Train accuracy: 89.315
Test accuracy: 90.58
Train accuracy: 90.145
Test accuracy: 91.34
Train accuracy: 90.795
Test accuracy: 91.79
Train accuracy: 91.20333333333333
Test accuracy: 92.04
Train accuracy: 91.57333333333334
Test accuracy: 92.23
Train accuracy: 91.84833333333333
Test accuracy: 92.42
Train accuracy: 92.16166666666666
Test accuracy: 92.53
Train accuracy: 92.37666666666667
Test accuracy: 92.73
Train accuracy: 92.56333333333333
Test accuracy: 92.94
Train accuracy: 92.705
Test accuracy: 92.98
Train accuracy: 92.84333333333333
Test accuracy: 93.15
Train accuracy: 92.975
Test accuracy: 93.19
Train accuracy: 93.11
Test accuracy: 93.22
Train accuracy: 93.215
Test accuracy: 93.27
Train accuracy: 93.335
Test accuracy: 93.32
Train accuracy: 93.415


In [18]:
W1 = pd.DataFrame(W1)
b1 = pd.DataFrame(b1)
W2 = pd.DataFrame(W2)
b2 = pd.DataFrame(b2)
W3 = pd.DataFrame(W3)
b3 = pd.DataFrame(b3)

W1.to_csv("CSVs\\W1.csv", index=False, header=False)
b1.to_csv("CSVs\\b1.csv", index=False, header=False)
W2.to_csv("CSVs\\W2.csv", index=False, header=False)
b2.to_csv("CSVs\\b2.csv", index=False, header=False)
W3.to_csv("CSVs\\W3.csv", index=False, header=False)
b3.to_csv("CSVs\\b3.csv", index=False, header=False)