# INM702 Neural Network for MNIST

In [12]:
import pandas as pd,numpy as np,matplotlib.pyplot as plt, os
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
%matplotlib inline

## Mathematical Functions and Derivatives

In [169]:
#Activation Functions
import numpy as np
#Input and Hidden Layers
@np.vectorize
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

@np.vectorize
def d_sigmoid(x):
    s = sigmoid(x)
    return s * (1 - s)

@np.vectorize
def relu(x):
    return x * (x > 0)

@np.vectorize
def d_relu(x):
    return 1 * (x > 0)

#Output Layer
#subtract max value of input vector to prevent overflow
def softmax(x):
    e = np.exp(x - np.max(x))
    return e / e.sum()
    
#Loss Functions
#clip values to prevent divide by zero error
@np.vectorize
def cross_entropy_loss(y_pred, y):
    eps = 1e-12
    y_pred = np.clip(y_pred, eps, 1 - eps)
    if y == 1:
        return -np.log(y_pred)
    else:
        return -np.log(1 - y_pred)

#d_loss = dL/dsoftmax * dsoftmax/dz
@np.vectorize
def d_loss(y_pred, y):
    return y_pred - y
    

#Truncated Normal Distribution for Initialising weights
from scipy.stats import truncnorm
def trunc_norm(mean = 0, sd = 1, lower = -1, upper = 1):
    return truncnorm((lower - mean) / sd, (upper - mean) / sd, loc=mean, scale=sd)
    

## Neural Network Class

In [178]:
class NeuralNetwork:
    def __init__(self, hidden_layer_sizes = [128,128], learning_rate = 0.001 , activation_fn = 'r', dropout_rate = 0):
        #list for number of units in each layer, 28*28 inputs, 10 outputs
        self.layer_sizes = [28*28] + hidden_layer_sizes + [10]
        self.lr = learning_rate
        self.dr = dropout_rate
        
        #activation_fn parameter takes 'r'(default) or 's' to choose ReLu or Sigmoid for the activation function
        if activation_fn == 's':
            self.activation = sigmoid
            self.d_activation = d_sigmoid
        else:
            self.activation = relu
            self.d_activation = d_relu
            
        self.initialise_weights()
    
    def initialise_weights(self):
        self.weights = list()
        for i in range(len(self.layer_sizes) - 1):
            #use truncated normal distribution to intialise weights
            trunc = 1 / np.sqrt(self.layer_sizes[i])
            X = trunc_norm(mean = 0, sd = 1, lower = -trunc, upper = trunc)
            w = X.rvs((self.layer_sizes[i+1], self.layer_sizes[i]))
            self.weights.append(w)
    
    def forward_pass(self, x):
        x = np.array(x).T
        #input to hidden layer
        self.z = [np.dot(self.weights[0], x)]
        self.a = [self.activation(self.z[0])]
        
        #hidden layers
        for i in range(len(self.weights) - 2):
            self.z.append(np.dot(self.weights[1 + i], self.a[i]))
            self.a.append(self.activation(self.z[1 + i]))
            
        #hidden to output layer
        self.z.append(np.dot(self.weights[-1], self.a[-1]))
        self.a.append(softmax(self.z[-1]))
    
    def back_prop(self, x, y):
        x = np.array(x).T
        print(x.shape)
        y = np.array(y).T
        
        weights_update = list()
        
        #start backpropogation process
        #output to hidden layer
        temp_dz = d_loss(self.a[-1], y)
        self.dw = [np.dot(temp_dz, self.a[-2].T)]
        
        #hidden layers
        for n in range(len(self.weights)-2):
            temp_dz = np.dot(self.weights[-1 - n].T, temp_dz) * self.d_activation(self.z[-2-n])
            self.dw.insert(0, np.dot(temp_dz, self.a[-3-n].T))
        
        #hidden to input layer
        temp_dz = np.dot(self.weights[-2].T, temp_dz) * self.d_activation(self.z[-3])
        self.dw.insert(0, np.dot(temp_dz, x.T))
        
        #update weights
        for i in range(len(self.weights)):
            self.weights[i] -= self.lr * self.dw[i]

    def train(self, x_train, y_train, epochs = 5):
        y = np.array(y_train).T
        
        for i in range(epochs):
            self.forward_pass(x_train)
            print("Epoch no. {}\nLoss: {}".format(i+1, cross_entropy_loss(self.a[-1], y).sum()))
            self.back_prop(x_train, y_train)
        return cross_entropy_loss(self.a[-1], y)
    
    def test(self, x_test, y_test):
        y = np.array(y_test).T
        self.forward_pass(x_test)
        print(cross_entropy_loss(self.a[-1], y).sum())
        
        
            
        

## Load and Preprocess MNIST dataset

In [161]:
#Load train and test csv
input_folder_path = "MNIST_data/"

train_df = pd.read_csv(input_folder_path + "mnist_train.csv")
test_df = pd.read_csv(input_folder_path + "mnist_test.csv")

#First column is the target/label
train_labels = train_df['label'].values
test_labels = test_df['label'].values

#Pixels values start from the 2nd column
train_images = (train_df.iloc[:,1:].values).astype('float32')
test_images = (test_df.iloc[:,1:].values).astype('float32')

#Normalise
train_images /= 255
test_images /=255

#One Hot Encoding
from tensorflow.keras.utils import to_categorical
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

## Create NN instance

In [179]:
simple_network = NeuralNetwork(hidden_layer_sizes = [128,128],learning_rate = 0.01)


simple_network.train(train_images[:100],train_labels[:100],epochs = 10);
print("\nTEST")
simple_network.test(test_images[:100],test_labels[:100])




Epoch no. 1
Loss: 693.0190150311574
(784, 100)
Epoch no. 2
Loss: 689.0127781748067
(784, 100)
Epoch no. 3
Loss: 683.3705247528807
(784, 100)
Epoch no. 4
Loss: 825.4123596690041
(784, 100)
Epoch no. 5
Loss: 2691.6671086691895
(784, 100)
Epoch no. 6
Loss: 2735.4710904778267
(784, 100)
Epoch no. 7
Loss: 2790.7331548316465
(784, 100)
Epoch no. 8
Loss: 2735.4710904778267
(784, 100)
Epoch no. 9
Loss: 2790.7331548316465
(784, 100)
Epoch no. 10
Loss: 2735.4710904778267
(784, 100)

TEST
2735.4710904778267
