# Exercise Sheet 5, Task 3
In this assignment, we will implement a neural network “library”, using Python and Numpy. The tool is inspired by PyTorch’s implementation.

This week, we will implement Dropout regularisation. For this, we implement a new module ('forward' and 'backward' function) that is initialised with a parameter p, denoting the probability that a weight is dropped. We also need to remember to to scale the resulting weights to make up for the missing weights.

We modify the implementation of the NeuralNetwork to include dropout with a specified rate (p:float=0.5 in the constructor) after every hidden layer! We can confirm the random cancellation of weights, since each run of the program will result in different results.

In [79]:

import numpy as np
from typing import List, Tuple

# This is the only fully new part

class Dropout:
    def __init__(self, p=0.5):
        self.p = p

    def forward(self, x: np.array) -> np.array:
        self.mask = np.random.rand(*x.shape) > self.p
        # Scale the mask to even out missing neurons
        x = x * self.mask / self.p
        return x

    def backward(self, grad: np.array = np.array([[1]])) -> np.array:
        # Scale the mask to even out missing neurons
        return grad * self.mask / self.p

# Once again, we take all of this from previous weeks, only the Neural Network itself will be altered, not the individual layers.
class Sigmoid:
    def __init__(self):
        pass

    def non_rounded_sigmoid(self,x : np.array) -> np.array:
        return 1 / (1 + np.exp(-x))


    def forward(self, x: np.array) -> np.array:
        return 1 / (1 + np.exp(-x))

    def backward(self, x: np.array, grad: np.array = np.array([[1]])) -> np.array:
        return grad * (self.forward(x) * (1 - self.forward(x)))

class MeanSquaredError:
    def __init__(self):
        pass

    def forward(self, y_pred: np.array, y_true: np.array) -> float:
        return np.mean(0.5 * (y_true - y_pred) ** 2)

    def backward(self, y_pred: np.array, y_true: np.array, grad: np.array = np.array([[1]])) -> np.array:
        return  grad * (y_pred - y_true)

class FullyConnectedLayer:
    def __init__(self, input_size: int, output_size: int):
        self.input_size = input_size
        self.output_size = output_size

        self.weights = np.random.randn(self.input_size, self.output_size)
        self.bias = np.zeros((1, self.output_size))

    def forward(self, x: np.array) -> np.array:
        return np.matmul(x, self.weights) + self.bias

    def backward(self, x: np.array, grad: np.array = np.array([[1]])) -> Tuple[np.array,np.array,np.array]:
        x_grad = np.matmul(grad, self.weights.T)
        W_grad = np.matmul(x.T, grad)
        b_grad = grad

        return x_grad, W_grad, b_grad

class NeuralNetwork:
    def __init__(self,
                 input_size: int,
                 output_size: int,
                 hidden_sizes: List[int],
                 activation=Sigmoid,
                 dropout:float =0.5 ):
        self.activ_inputs = None
        self.layer_inputs = None
        s = [input_size] + hidden_sizes + [output_size]
        self.layers = [FullyConnectedLayer(s[i], s[i+1]) for i in range(len(s) - 1)]
        self.dropouts = [Dropout(dropout) for i in range(len(s) - 2)]
        self.activation = activation()

    def forward(self, x: np.array) -> np.array:
        # we need to edit this function to cache our inputs and outputs for each layer during the forward passe!
        self.layer_inputs = []
        self.activ_inputs = []

        for layer,dropout in zip(self.layers[:-1],self.dropouts):
            self.layer_inputs.append(x)
            x = layer.forward(x)
            self.activ_inputs.append(x)
            x = self.activation.forward(x)
            # Dropout Layer
            x = dropout.forward(x)

        #The last layer should not be using an activation function
        self.layer_inputs.append(x)
        x = self.layers[-1].forward(x)
        return x

    def backward(self, x: np.array, grad: np.array = np.array([[1]])) -> Tuple[np.array]:
        W_grads = []
        b_grads = []

        # Backward pass for the last layer
        grad, W_grad, b_grad = self.layers[-1].backward(self.layer_inputs[-1], grad)
        W_grads.append(W_grad)
        b_grads.append(b_grad)

        # Backward pass for the remaining layers
        for i in reversed(range(len(self.activ_inputs))):
            # Dropout Layer
            grad = self.dropouts[i].backward(grad)
            grad = self.activation.backward(self.activ_inputs[i], grad)
            grad, W_grad, b_grad = self.layers[i].backward(self.layer_inputs[i], grad)
            W_grads.append(W_grad)
            b_grads.append(b_grad)

        return grad, list(reversed(W_grads)), list(reversed(b_grads))

## Testing the Implementation

Running this cell several times should now yield different results, as the dropout layer will randomly drop neurons. Don't worry if everything becomes 0, as we are using a very small network, and the dropout layer might drop all neurons.

In [80]:

# Network Initialization
net = NeuralNetwork(2, 1, [2], Sigmoid)

# Setting the layer weights
net.layers[0].weights = np.array([[0.5, 0.75], [0.25, 0.25]])
net.layers[1].weights = np.array([[0.5], [0.5]])

# Loss
loss_function = MeanSquaredError()

# Input
x = np.array([[1, 1]])
y = np.array([[0]])

# Forward Pass
pred = net.forward(x)

# Loss Calculation
loss = loss_function.forward(pred, y)

print(f"Prediction: {pred}")
print(f"Loss: {loss}")

# Backward Pass
grad = loss_function.backward(pred, y)
grad, W_grads, b_grads = net.backward(x, grad)

print(f"Gradients of the first layer: \n\nW1:\n{W_grads[0]}, \n\nb1: \n{b_grads[0]}\n")
print(f"Gradients of the second layer: \n\nW2:\n{W_grads[1]}, \n\nb2 \n{b_grads[1]}")

Prediction: [[0.6791787]]
Loss: 0.23064185270678947
Gradients of the first layer: 

W1:
[[0.14798964 0.        ]
 [0.14798964 0.        ]], 

b1: 
[[0.14798964 0.        ]]

Gradients of the second layer: 

W2:
[[0.92256741]
 [0.        ]], 

b2 
[[0.6791787]]
