This is a simple neural network with one hidden layer and an output layer.
The notebook is intended to serve as a skeleton for much bigger networks to be built from.

In [None]:
# Importing Essential Libraries
import numpy as np            # For Numerical Computations
import matplotlib.pyplot as plt     # For Visualisation
import pandas as pd            # For Data Plotting

Activation Functions and their Derivatives Definition

In [None]:
# Sigmoid/Logistic Activation Function Definition

def sigmoid(Z):
    return 1/(1 + np.exp(-Z))

# Sigmoid/Logistic Function Inverse/Derivative Definition

def sigmoid_inv(Z):
    return sigmoid(Z) * (1 - sigmoid(Z))

In [None]:
# Rectified Linear Unit Activation Function Defninition

def relu(Z):
    return np.maximum(0, Z)

# Rectified Linear Unit Function Inverse/Derivative Definition

def relu_inv(Z):
    return Z > 0

In [None]:
# Tanh Activation Function Definition

def tanh(Z):
    return np.tanh(Z)

# Tanh Function Inverse/Derivative Definition

def tanh_inv(Z):
    return 1. - np.power(np.tanh(Z), 2)

The following code defines the blueprint of the neural network
To keep this simple, the input layer will have n_x (dimension of input matrix X) units, the hidden layer will have n_h (variable) layers, and the output layer will have n_y (dimension of output matrix Y) layers. 

In [None]:
def define_layers(X, Y): # Takes in Matrices X and Y (features and labels)

    n_x = X.shape[0]
    n_h = 16          # Change the value to vary the number of hidden layer units
    n_y = Y.shape[0]

    return n_x, n_h, n_y      # Basically returns the network architecture

Next code block will initialise the network's weights and biases. Using default random initialisation scaled by 0.01

In [None]:
def init_parameters(n_x, n_h, n_y): # Takes in details of the network architecture

    W1 = np.random.rand(n_h, n_x) * 0.01
    b1 = np.zeros((n_h, 1))
    W2 = np.random.rand(n_y, n_h) * 0.01
    b2 = np.zeros((n_y, 1))

    parameters = {
        'W1': W1,
        'b1': b1,
        'W2': W2,
        'b2': b2
        }

    return parameters # Returns a dictionary of the Weights and Biases

The Feed Forward Step

In [None]:
def forward_propagate(parameters, X): # Takes in the weights and biases to perform matrix multiplications and activations

    W1 = parameters['W1'] # Retrieves the values
    b1 = parameters['b1']   # of the parameteres
    W2 = parameters['W2']     # from the parameters
    b2 = parameters['b2']       # dictionary
    
    Z1 = W1.dot(X) + b1   # Linear feed forward of the hidden layer
    A1 = relu(Z1)            # Using the relu activation to get non linearity
    Z2 = W2.dot(A1) + b2        # Linear feed forward of the output layer
    A2 = sigmoid(Z2)                # Using the sigmoid activation to get non linearity for the output layer 

    cache = {
        'Z1': Z1,
        'A1': A1,
        'Z2': Z2,
        'A2': A2
        } # Saves the values of linear and non-linear functions in to a cache dictionary
    
    return cache

The Feed Backward Step

In [None]:
def backward_propagate(cache, parameters, X, Y):

    m = Y.shape[1] # Getting the number of training examples
    W1 = parameters['W1']
    W2 = parameters['W2']

    Z1 = cache['Z1']
    A1 = cache['A1']
    Z2 = cache['Z2']
    A2 = cache['A2']

    dZ2 = A2 - Y            # Derivative of Z2
    dW2 = dZ2.dot(A1.T) / m   # Derivative of W2
    db2 = np.sum(dZ2) / m       # Derivative of b2
    dA1 = W2.T.dot(dZ2)            # Derivative of A1
    dZ1 = dA1 * relu_inv(Z1)          # Derivative of Z1
    dW1 = dZ1.dot(X.T) / m              # Derivative of W1
    db1 = np.sum(dZ1) / m                 # Derivative of b1

    # Derivative of x (dx) is basically computing how much the loss changes with a change in the value of x

    grads = {
        'dW1': dW1,
        'db1': db1,
        'dW2': dW2,
        'db2': db2
        }

    return grads # Saves the value of the gradients into a dictionary called grads

The Loss/Error function. Specifically the logistic loss that compares the predicted value to the actual value. A neural network is meant to learn a set of weights and biases that reduce the value of the loss 

In [None]:
def compute_loss(y_hat, y): # y_hat os 

    m = y.shape[1]
    loss = np.multiply(y, np.log(y_hat)) + np.multiply((1 - y), np.log(1 - y_hat)) # Logistic loss formula
    cost = - np.sum(loss)
    cost /= m
    
    return cost

Gradient Descent. This module is the algorithm that actually updates the value of the parameters according to what reduces the cost or loss best

In [None]:
def update_parameters(parameters, grads, learning_rate = 0.05): # learning rate regulates the speed of learning

    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    
    dW1 = grads['dW1']
    db1 = grads['db1']
    dW2 = grads['dW2']
    db2 = grads['db2']

    W1 = W1 - (learning_rate * dW1) # The derivative dx
    b1 = b1 - (learning_rate * db1)   # is the slope which determines
    W2 = W2 - (learning_rate * dW2)     # the steepness and direction of the line
    b2 = b2 - (learning_rate * db2)       # therefore telling us how to adjust x properly to reduce cost

    parameters = {
        'W1': W1,
        'b1': b1,
        'W2': W2,
        'b2': b2
        }
    
    return parameters # A single step of gradient descent overwrites the preexisting parameters with newer parameters that better reduce loss

Collation of the functions to make a complete neural network model

In [None]:
def neural_network_model(X, Y, num_iterations = 1000):

    n_x, n_h, n_y = define_layers(X, Y)
    
    parameters = init_parameters(n_x, n_h, n_y)
    
    costs = []
    
    for i in range(0, num_iterations): # num_iterations holds the number of how many gradient descent steps to take
        
        cache = forward_propagate(parameters, X)
                
        cost = compute_loss(cache['A2'], Y) # compute the cost after each step
        
        grads = backward_propagate(cache, parameters, X, Y) # compute gradients
        
        parameters = update_parameters(parameters, grads) # gradient descent step, adjusting weights to be better
        
        if i % 10 == 0:
            costs.append(cost) # After 10 steps, save the current value of the cost into a array for visualization
        if i % 1000 == 0:
            print('Cost after', i, 'iterations:', str(cost)) # After 1000 steps, print the current error value to determine if it is reducing (if the model is learning)

    plt.plot(costs)                   # plots a graph
    plt.xlabel('Number of Iterations')    # of the cost to 
    plt.ylabel('Cost')                      # for error analysis
    plt.show

    return parameters # returns the final parameters that the network learns

Module to make predictions using the learned parameters

In [None]:
def predict(resulting_parameters, X_test): # Takes in the learned parameters and the input data to predict  

    cache = forward_propagate(resulting_parameters, X_test) # Perform forward propagation with the new test data
    predictions = cache['A2']                                  # Saves the predictions
    prediction = predictions > 0.5                                # Setting the threshold to 0.5
    prediction = prediction.astype(int)

    return prediction

Running the next code block runs the entire model
This is where you replace X_train and Y_train variables with your training data 

The actual training of your model happens here

In [None]:
resulting_parameters = neural_network_model(X_train, Y_train)

Calculate the accuracy of your model

In [None]:
predictions_on_train_set = predict(resulting_parameters, X_train)
accuracy = np.sum((predictions_on_train_set == Y_train).astype(int))/Y_train.size
print("Accuracy: ", str(accuracy * 100), "%")