This function will take a list of dictionaries and convert that structure into an initial neural network with He weight initialization.

In [1]:
import numpy as np

def init_layers(nn_architecture, seed = 42):
    np.random.seed(seed)
    params_values = {}

    # Take the list of dictionaries and construct a NN with randomly assigned weights and bias vectors.
    # Layers are given small random numbers as their inital state so as not to encounter the breaking symmetry problem if all given the same values and it provides a good enough starting point.
    # Using small values increases the efficiency of the algorithm during the first iterations.
    for i in range(1, len(nn_architecture)):
        layer_input_size = nn_architecture[i-1]["input_nodes"]

        if (i == len(nn_architecture)-1):
            layer_output_size = 10
        else:
            layer_output_size = nn_architecture[i]["input_nodes"]
        
        # Initialize layer node's weights using He Weight Initialization. This is suitable as we are using ReLU activation functions.
        # Source: https://datascience-enthusiast.com/DL/Improving-DeepNeural-Networks-Initialization.html
        # This stage is important as incorrect weight initialization can lead to vanishing/exploding gradients.
        params_values['W' + str(i)] = np.random.randn(
            layer_output_size, layer_input_size) * np.sqrt(2.0 / layer_input_size)
        
        
    return params_values

Sigmoid, ReLU, and Softmax activation functions for both forward popagation and backward (deriative) propagation

In [2]:
# Activation Function - Sigmoid - Forward Propagation
def sigmoid(Z):
    return 1.0/(1.0+np.exp(-Z))

# Activation Function - ReLU - Forward Propagation
def relu(Z):
    return np.maximum(0,Z)

# Activation Function - Softmax - Forward Propagation - Avoids overflow or underflow due to floating point instability.
def softmax(Z):
    e = np.exp(Z - np.max(Z))
    return e / np.sum(e, axis = 0)


# Activation Function - Softmax - Backward Propagation - Avoids overflow or underflow due to floating point instability.
def softmax_backward(Z):
    smax = softmax(Z)
    return smax * (1 - smax)

# Activation Function - Sigmoid - Backward Propagation
def sigmoid_backward(Z):
    sig = sigmoid(Z)
    # return (np.exp(-Z))/((np.exp(-Z)+1)**2)
    return sig * (1-sig)

# Activation Function - ReLU - Backward Propagation
def relu_backward(Z):
    dZ = np.array(Z, copy = True)
    dZ[Z <= 0] = 0
    dZ[Z > 0] = 1
    return dZ

In [4]:
# Setup the initialization array which outlines the architecture of the NN and will be passed into the program to generate the appropraite NN.
nn_architecture = [
    {"input_nodes": 784, "activation": "relu"},
    {"input_nodes": 128, "activation": "relu"},
    {"input_nodes": 10, "activation": "softmax"},
]

import pandas as pd

df_test = pd.read_csv('fashion-mnist_test.csv')
df_train = pd.read_csv('fashion-mnist_train.csv')

y_train = np.array(df_train['label'].to_numpy())
df_train = df_train.drop('label', 1)

# Normalziing the pixel data
X_train = np.array((df_train.to_numpy() / 255).astype('float32'))

y_test = np.array(df_test['label'].to_numpy())
df_test = df_test.drop('label', 1)

# Normalziing the pixel data
X_test = np.array((df_test.to_numpy() / 255).astype('float32'))

# One Hot Encoding the labels
y_train = np.eye(10)[y_train]
y_test = np.eye(10)[y_test]

# Create neural network based on specified architecture with initial weights.
params_values = init_layers(nn_architecture, 42)
print(params_values)

{'W1': array([[ 0.02508785, -0.0069834 ,  0.03271321, ..., -0.06744508,
         0.01920289,  0.03083924],
       [ 0.02827369,  0.05458767,  0.04211943, ..., -0.01420919,
         0.00338354,  0.02605887],
       [-0.07892048, -0.0267212 ,  0.04011642, ..., -0.01979963,
         0.05353487,  0.03116351],
       ...,
       [-0.01520984, -0.02996139,  0.07425277, ...,  0.08622519,
        -0.0071112 ,  0.04780936],
       [ 0.04152834,  0.03256375, -0.01150627, ..., -0.08783793,
        -0.01636675, -0.0307535 ],
       [ 0.08485768, -0.00966599, -0.03247982, ...,  0.06209712,
         0.08136642,  0.07013439]]), 'W2': array([[-0.01002943,  0.095423  ,  0.13284665, ...,  0.13962699,
         0.12200584,  0.08994849],
       [-0.04016608, -0.00100266, -0.15665874, ..., -0.08438804,
        -0.06913039, -0.23780935],
       [-0.01403963, -0.08706934, -0.08593643, ..., -0.07263895,
        -0.04598236,  0.10144271],
       ...,
       [-0.15393538,  0.04641696, -0.28507112, ...,  0.000537