This function will take a list of dictionaries and convert that structure into an initial neural network with He weight initialization.

In [3]:
import numpy as np

def init_layers(nn_architecture, seed = 42):
    np.random.seed(seed)
    params_values = {}

    # Take the list of dictionaries and construct a NN with randomly assigned weights and bias vectors.
    # Layers are given small random numbers as their inital state so as not to encounter the breaking symmetry problem if all given the same values and it provides a good enough starting point.
    # Using small values increases the efficiency of the algorithm during the first iterations.
    for i in range(1, len(nn_architecture)):
        layer_input_size = nn_architecture[i-1]["input_nodes"]

        if (i == len(nn_architecture)-1):
            layer_output_size = 10
        else:
            layer_output_size = nn_architecture[i]["input_nodes"]
        
        # Initialize layer node's weights using He Weight Initialization. This is suitable as we are using ReLU activation functions.
        # Source: https://datascience-enthusiast.com/DL/Improving-DeepNeural-Networks-Initialization.html
        # This stage is important as incorrect weight initialization can lead to vanishing/exploding gradients.
        params_values['W' + str(i)] = np.random.randn(
            layer_output_size, layer_input_size) * np.sqrt(2.0 / layer_input_size)
        
        
    return params_values

In [4]:
# Setup the initialization array which outlines the architecture of the NN and will be passed into the program to generate the appropraite NN.
nn_architecture = [
    {"input_nodes": 784, "activation": "relu"},
    {"input_nodes": 128, "activation": "relu"},
    {"input_nodes": 10, "activation": "softmax"},
]

import pandas as pd

df_test = pd.read_csv('./Dataset/fashion-mnist_test.csv')
df_train = pd.read_csv('./Dataset/fashion-mnist_train.csv')

y_train = np.array(df_train['label'].to_numpy())
df_train = df_train.drop('label', 1)

# Normalziing the pixel data
X_train = np.array((df_train.to_numpy() / 255).astype('float32'))

y_test = np.array(df_test['label'].to_numpy())
df_test = df_test.drop('label', 1)

# Normalziing the pixel data
X_test = np.array((df_test.to_numpy() / 255).astype('float32'))

# One Hot Encoding the labels
y_train = np.eye(10)[y_train]
y_test = np.eye(10)[y_test]

# Create neural network based on specified architecture with initial weights.
params_values = init_layers(nn_architecture, 42)
print(params_values)

  df_train = df_train.drop('label', 1)
  df_test = df_test.drop('label', 1)
