In [152]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Data Import and Prep ##

In [153]:
data = pd.read_csv('train.csv')
data = np.array(data)
m,n = data.shape
np.random.shuffle(data)

# Split the data to train and test
# Data is divided by 255 to normalize (range 0,1)
train_data = data[:33600, 1:].transpose() / 255
train_labels = data[:33600 ,0]
train_m = 33600

test_data = data[33601:, 1:].transpose()
test_labels = data[33601:, 0]

## Neural Network

In [None]:
def Relu(matrix):
    return np.maximum(matrix, 0)

def Deriv_Relu(matrix):
    return matrix > 0

# This one causes errors
# def Softmax(logits):
#     exp_logits = np.exp(logits - np.max(logits, axis=1, keepdims=True))  # Numerical stability improvement
#     return exp_logits / np.sum(exp_logits, axis=1, keepdims=True)

def Softmax(logits):
    logits = np.exp(logits) / sum(np.exp(logits))
    return logits

class Neural_Network:
    def __init__(self, a0, labels):
        # Weights and Biases
        self.w1 = np.random.rand(10, 784) - 0.5
        self.b1 = np.random.rand(10, 1) - 0.5
        self.w2 = np.random.rand(10, 10) - 0.5
        self.b2 = np.random.rand(10, 1) - 0.5
        
        # Others
        self.a0 = a0
        _,self.m = self.a0.shape
        self.labels = labels
        
        # one-hot encode labels
        self.y = np.zeros((self.labels.size, self.labels.max() + 1))
        self.y[np.arange(self.labels.size), self.labels] = 1
        self.y = self.y.T
        
    def forward_pass(self, x):
        # Hidden Layer
        self.z1 = self.w1.dot(x) + self.b1
        self.a1 = Relu(self.z1)
        
        # Output Layer
        self.z2 = self.w2.dot(self.a1) + self.b2
        self.a2 = Softmax(self.z2)
    
    def calc_cost(self):
        # Cost Function (Mean Square Error)
        return np.sum(np.power((self.a2 - self.y), 2) / 2) / 784
     
    def backward_pass(self):
        self.dZ2 = self.a2 - self.y
        self.dW2 = 1 / self.m * self.dZ2.dot(self.a1.T)
        
        self.db2 = 1 / self.m * np.sum(self.dZ2)
        
        # We element wise multiply the derivative of Relu as Relu is an element-wise activation function
        self.dZ1 = self.w2.T.dot(self.dZ2) * Deriv_Relu(self.z1)
        self.dW1 = 1 / self.m * self.dZ1.dot(self.a0.T)
        
        self.db1 = 1 / self.m * np.sum(self.dZ1)
        
    def grad_descent(self, LR):
        self.w1 -= LR * self.dW1
        self.w2 -= LR * self.dW2
        self.b1 -= LR * self.db1
        self.b2 -= LR * self.db2
        
    def train(self, LR, epochs):
        for i in range(epochs):
            self.forward_pass(self.a0)
            self.backward_pass()
            self.grad_descent(LR)
            
            if i % 10 == 0:
                print("Iteration: ", i)
                print(self.get_accuracy(self.labels))
                print()

    def predict(self, test_data):
        self.forward_pass(test_data)

    def get_accuracy(self, labels):
        predictions = np.argmax(self.a2, 0)
        return np.sum(predictions == labels) / labels.size
    

In [155]:
x = Neural_Network(train_data, train_labels)

x.train(0.1, 500)

Iteration:  0
0.03857142857142857

Iteration:  10
0.1181547619047619

Iteration:  20
0.17318452380952382

Iteration:  30
0.21904761904761905

Iteration:  40
0.2538690476190476

Iteration:  50
0.28023809523809523

Iteration:  60
0.3030952380952381

Iteration:  70
0.34148809523809526

Iteration:  80
0.40461309523809524

Iteration:  90
0.4607142857142857

Iteration:  100
0.4994345238095238

Iteration:  110
0.534077380952381

Iteration:  120
0.5621428571428572

Iteration:  130
0.5848809523809524

Iteration:  140
0.6041666666666666

Iteration:  150
0.6217857142857143

Iteration:  160
0.6382440476190476

Iteration:  170
0.6524404761904762

Iteration:  180
0.6666369047619047

Iteration:  190
0.6776190476190476

Iteration:  200
0.6902678571428571

Iteration:  210
0.700595238095238

Iteration:  220
0.7104166666666667

Iteration:  230
0.7195238095238096

Iteration:  240
0.7270238095238095

Iteration:  250
0.7343452380952381

Iteration:  260
0.7404464285714286

Iteration:  270
0.7461904761904762


## Testing ##