### **Analysing How weight initialisation affects model metrics**

#### ***Dependencies***

In [1]:
# Add the path of outer folders for easy imports
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

import pandas as pd
import numpy as np
import tensorflow as tf

from src.activation_functions.ReLU import ReLU
from src.activation_functions.Softmax import Softmax
from src.layers.DenseLayer import DenseLayer
from src.NeuralNetwork import NeuralNetwork

#### ***Load Data***

In [2]:
def load_mnist():
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
    x_train = x_train.reshape(-1, 28*28).astype(np.float32) / 255.0
    x_test = x_test.reshape(-1, 28*28).astype(np.float32) / 255.0
    y_train = tf.keras.utils.to_categorical(y_train, 10)
    y_test = tf.keras.utils.to_categorical(y_test, 10)
    return x_train, y_train, x_test, y_test

In [3]:
x_train, y_train, x_test, y_test = load_mnist()

#### ***Create Neural Network (w/o any weight initialisation)***

In [4]:
network1 = NeuralNetwork()
network1.add(ReLU())
network1.add(DenseLayer(no_inputs=28*28, no_neurons=128, weight_initialisation=1))
network1.add(DenseLayer(no_inputs=128, no_neurons=64))
network1.add(ReLU())
network1.add(DenseLayer(no_inputs=64, no_neurons=10))
network1.add(Softmax())

#### ***Train the Neural Network***

In [5]:
network1.train(x_train, y_train, epochs=5, learning_rate=0.01, batch_size=32)

  return -np.mean(np.sum(true_values * np.log(predicted_values), axis=1))
  return -np.mean(np.sum(true_values * np.log(predicted_values), axis=1))


Epoch: 1/5; Loss: 92.920118
Epoch: 2/5; Loss: nan
Epoch: 3/5; Loss: nan
Epoch: 4/5; Loss: nan
Epoch: 5/5; Loss: nan


#### ***Test the Neural Network***

In [6]:
network1.test(x_test, y_test)

Accuracy: 53.56%


#### ***Create Neural Network (with 'he' initialisation)***

In [7]:
network2 = NeuralNetwork()
network2.add(ReLU())
network2.add(DenseLayer(no_inputs=28*28, no_neurons=128))       # Default is he initialisation
network2.add(DenseLayer(no_inputs=128, no_neurons=64))
network2.add(ReLU())
network2.add(DenseLayer(no_inputs=64, no_neurons=10))
network2.add(Softmax())

#### ***Training the Neural Network***

In [8]:
network2.train(x_train, y_train, epochs=5, learning_rate=0.01, batch_size=32)

Epoch: 1/5; Loss: 0.24487
Epoch: 2/5; Loss: 0.06993
Epoch: 3/5; Loss: 0.309386
Epoch: 4/5; Loss: 0.050972
Epoch: 5/5; Loss: 0.009123


#### ***Testing the Neural Network***

In [9]:
network2.test(x_test, y_test)

Accuracy: 96.25%


***This shows that initialisation of weights is a crucial factor in training better neural networks.***