# INM702 Neural Network for MNIST

In [12]:
import pandas as pd,numpy as np,matplotlib.pyplot as plt, os
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
%matplotlib inline

## Mathematical Functions and Derivatives

In [63]:
#Activation Functions
import numpy as np
#Input and Hidden Layers
@np.vectorize
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

@np.vectorize
def d_sigmoid(x):
    s = sigmoid(x)
    return s * (1 - s)

@np.vectorize
def relu(x):
    return x * (x > 0)

@np.vectorize
def d_relu(x):
    return 1 * (x > 0)

#Output Layer
def softmax(x):
    e = np.exp(x)
    return e / e.sum()
    
#Loss Functions
@np.vectorize
def cross_entropy_loss(y_pred, y):
    if y == 1:
        return -np.log(y_pred)
    else:
        return -np.log(1 - y_pred)

#d_loss = dL/dsoftmax * dsoftmax/dz
@np.vectorize
def d_loss(y_pred, y):
    return y_pred - y
    

#Truncated Normal Distribution for Initialising weights
from scipy.stats import truncnorm
def trunc_norm(mean = 0, sd = 1, lower = -1, upper = 1):
    return truncnorm((lower - mean) / sd, (upper - mean) / sd, loc=mean, scale=sd)
    

## Neural Network Class

In [146]:
class NeuralNetwork:
    def __init__(self, hidden_layer_sizes = [128,128], learning_rate = 0.001 , activation_fn = 'r', dropout_rate = 0):
        #list for number of units in each layer, 28*28 inputs, 10 outputs
        self.layer_sizes = [28*28] + hidden_layer_sizes + [10]
        self.lr = learning_rate
        self.dr = dropout_rate
        
        #activation_fn parameter takes 'r'(default) or 's' to choose ReLu or Sigmoid for the activation function
        if activation_fn == 's':
            self.activation = sigmoid
            self.d_activation = d_sigmoid
        else:
            self.activation = relu
            self.d_activation = d_relu
            
        self.initialise_weights()
    
    def initialise_weights(self):
        self.weights = list()
        for i in range(len(self.layer_sizes) - 1):
            #use truncated normal distribution to intialise weights
            trunc = 1 / np.sqrt(self.layer_sizes[i])
            X = trunc_norm(mean = 0, sd = 1, lower = -trunc, upper = trunc)
            w = X.rvs((self.layer_sizes[i+1], self.layer_sizes[i]))
            self.weights.append(w)
    
    def forward_pass(self, x):
        x = np.array(x).T
        #input to hidden layer
        self.z = [np.dot(self.weights[0], x)]
        self.a = [self.activation(self.z[0])]
        
        #hidden layers
        for i in range(len(self.weights) - 2):
            self.z.append(np.dot(self.weights[1 + i], self.a[i]))
            self.a.append(self.activation(self.z[1 + i]))
            
        #hidden to output layer
        self.z.append(np.dot(self.weights[-1], self.a[-1]))
        self.a.append(softmax(self.z[-1]))
    
    def back_prop(self, x, y):
        x = np.array(x).T
        print(x.shape)
        y = np.array(y).T
        
        weights_update = list()
        
        #start backpropogation process
        #output to hidden layer
        temp_dz = d_loss(self.a[-1], y)
        self.dw = [np.dot(temp_dz, self.a[-2].T)]
        
        #hidden layers
        for n in range(len(self.weights)-2):
            temp_dz = np.dot(self.weights[-1 - n].T, temp_dz) * self.d_activation(self.z[-2-n])
            self.dw.insert(0, np.dot(temp_dz, self.a[-3-n].T))
        
        #hidden to input layer
        temp_dz = np.dot(self.weights[-2].T, temp_dz) * self.d_activation(self.z[-3])
        self.dw.insert(0, np.dot(temp_dz, x.T))
        
        #update weights
        for i in range(len(self.weights)):
            self.weights[i] -= self.lr * self.dw[i]

    def train(self, x_train, y_train, epochs = 5):
        y = np.array(y_train).T
        
        for i in range(epochs):
            self.forward_pass(x_train)
            print("Epoch no. {}\nLoss: {}".format(i+1, cross_entropy_loss(self.a[-1], y).sum()))
            self.back_prop(x_train, y_train)
        return cross_entropy_loss(self.a[-1], y)
    
    def test(self, x_test):
        pass
            
        

## Load and Preprocess MNIST dataset

In [80]:
#Load train and test csv
input_folder_path = "MNIST_data/"

train_df = pd.read_csv(input_folder_path + "mnist_train.csv")
test_df = pd.read_csv(input_folder_path + "mnist_test.csv")

#First column is the target/label
train_labels = train_df['label'].values
test_labels = test_df['label'].values

#Pixels values start from the 2nd column
train_images = (train_df.iloc[:,1:].values).astype('float32')
test_images = (test_df.iloc[:,1:].values).astype('float32')

#Normalise
train_images /= 255
test_images /=255

#One Hot Encoding
from tensorflow.keras.utils import to_categorical
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

## Create NN instance

In [149]:
simple_network = NeuralNetwork(hidden_layer_sizes = [100,100])


simple_network.train(train_images[:8],train_labels[:8],epochs = 100)




Epoch no. 1
Loss: 35.964962414301866
(784, 8)
Epoch no. 2
Loss: 35.948424148501196
(784, 8)
Epoch no. 3
Loss: 35.9323532069683
(784, 8)
Epoch no. 4
Loss: 35.916358770626445
(784, 8)
Epoch no. 5
Loss: 35.90051503606526
(784, 8)
Epoch no. 6
Loss: 35.88488657297006
(784, 8)
Epoch no. 7
Loss: 35.869487145215324
(784, 8)
Epoch no. 8
Loss: 35.853825591360234
(784, 8)
Epoch no. 9
Loss: 35.83814618547573
(784, 8)
Epoch no. 10
Loss: 35.822376934222135
(784, 8)
Epoch no. 11
Loss: 35.80689891702256
(784, 8)
Epoch no. 12
Loss: 35.79147955081553
(784, 8)
Epoch no. 13
Loss: 35.776455727966145
(784, 8)
Epoch no. 14
Loss: 35.76185297748944
(784, 8)
Epoch no. 15
Loss: 35.747413551193624
(784, 8)
Epoch no. 16
Loss: 35.73282049627247
(784, 8)
Epoch no. 17
Loss: 35.7182926844813
(784, 8)
Epoch no. 18
Loss: 35.70385664805606
(784, 8)
Epoch no. 19
Loss: 35.689414738506805
(784, 8)
Epoch no. 20
Loss: 35.6750145896844
(784, 8)
Epoch no. 21
Loss: 35.66077415736997
(784, 8)
Epoch no. 22
Loss: 35.646299616459395

array([[0.01161728, 4.26861096, 0.01106561, 0.01060418, 0.01082082,
        0.01128587, 0.01097433, 0.01215565],
       [0.01493326, 0.01380981, 0.01285385, 4.15055918, 0.01662672,
        0.01716509, 4.10567276, 0.01668178],
       [0.01475302, 0.01354911, 0.01268136, 0.01441009, 0.01512963,
        3.96418675, 0.01498388, 0.01647968],
       [0.0128215 , 0.01276245, 0.01232179, 0.01168498, 0.0125913 ,
        0.0139657 , 0.01230306, 4.11210081],
       [0.01142222, 0.01125207, 4.33153917, 0.01063079, 0.01140023,
        0.01166598, 0.01077118, 0.01177489],
       [4.23060743, 0.01197221, 0.01183013, 0.01215623, 0.01241553,
        0.01267225, 0.01220881, 0.01351783],
       [0.01139758, 0.01049485, 0.01090614, 0.01119191, 0.01046053,
        0.01084693, 0.01149986, 0.01083108],
       [0.0105154 , 0.01055846, 0.01171071, 0.01078201, 0.01088384,
        0.01075648, 0.01059507, 0.01107097],
       [0.01150816, 0.01163546, 0.01104873, 0.0111883 , 0.01190894,
        0.01205729, 0.011698