# Numpy deep neural network

### Imports

In [49]:
# set the file name (required)
__file__ = 'Numpy deep neural network.ipynb'

import numpy as np
from pprint import pprint
from IPython.display import Image

import ipytest.magics
import pytest

### Network architecture

![Network architecture](./supporting_visualizations/nn_architecture.png)

### Settings

In [59]:
NN_ARCHITECTURE = [
    {"input_dim": 2, "output_dim": 4, "activation": "relu"},
    {"input_dim": 4, "output_dim": 6, "activation": "relu"},
    {"input_dim": 6, "output_dim": 6, "activation": "relu"},
    {"input_dim": 6, "output_dim": 4, "activation": "relu"},
    {"input_dim": 4, "output_dim": 1, "activation": "sigmoid"},
]

### Activation functions

![Activations](./supporting_visualizations/activations.png)

In [80]:
def sigmoid(Z):
    return 1/(1+np.exp(-Z))

def relu(Z):
    return np.maximum(0,Z)

def sigmoid_backward(dA, Z):
    sig = sigmoid(Z)
    return dA * sig * (1 - sig)

def relu_backward(dA, Z):
    dZ = np.array(dA, copy = True)
    dZ[Z <= 0] = 0;
    return dZ;

### Initiation of parameter values for each layer

In [84]:
def initialize_parameters(nn_architecture, seed = 99):
    # random seed initiation
    np.random.seed(seed)
    # number of layers in our neural network
    number_of_layers = len(nn_architecture)
    # parameters storage initiation
    params_values = {}
    
    # iteration over network layers
    for idx, layer in enumerate(nn_architecture):
        # we number network layers from 1
        layer_idx = idx + 1
        
        # extracting the number of units in layers
        layer_input_size = layer["input_dim"]
        layer_output_size = layer["output_dim"]
        
        params_values['W' + str(layer_idx)] = np.random.randn(
            layer_output_size, layer_input_size) * 0.01
        params_values['b' + str(layer_idx)] = np.zeros(
            (layer_output_size, 1))
        
    return params_values

![Parameters sizes](./supporting_visualizations/params_sizes.png)

In [62]:
%%run_pytest[clean] -qq
# TEST PARAMETERS SHAPES

params_values = initialize_parameters(NN_ARCHITECTURE)

def test_first_layer_W_shape():
    assert params_values["W1"].shape == (NN_ARCHITECTURE[0]["output_dim"], NN_ARCHITECTURE[0]["input_dim"])
def test_first_layer_b_shape():
    assert params_values["b1"].shape == (NN_ARCHITECTURE[0]["output_dim"], 1)
def test_first_layer_W_shape():
    assert params_values["W2"].shape == (NN_ARCHITECTURE[1]["output_dim"], NN_ARCHITECTURE[1]["input_dim"])
def test_first_layer_b_shape():
    assert params_values["b2"].shape == (NN_ARCHITECTURE[1]["output_dim"], 1)

..                                                                       [100%]


### Single layer forward propagation step

![Single unit](./supporting_visualizations/single_unit.png)

In [65]:
def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation="relu"):
    # calculation of the input value for the activation function
    Z_curr = np.dot(W_curr, A_prev) + b_curr
    
    # selection of activation function
    if activation is "relu":
        activation_func = relu
    elif activation is "sigmoid":
        activation_func = sigmoid
    else:
        raise Exception('Non-supported activation function')
        
    return activation_func(Z_curr), Z_curr

![Matrix sizes](./supporting_visualizations/matrix_sizes.png)

![Matrix sizes 2](./supporting_visualizations/matrix_sizes_2.png)

In [79]:
%%run_pytest[clean] -qq
# TEST OUTPUT FOR SINGLE LAYER FORWARD STEP

np.random.seed(2)
A_prev = np.random.randn(3,2)
W_curr = np.random.randn(1,3)
b_curr = np.random.randn(1,1)

A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, activation="relu")

def test_relu_Z_shape():
    assert Z_curr.shape == (1,2)
def test_relu_A_shape():
    assert A_curr.shape == (1,2)
def test_relu_Z_value():
    assert np.allclose(Z_curr, np.array([[ 3.43896131, -2.08938436]]))
def test_relu_A_value():
    assert np.allclose(A_curr, np.array([[3.43896131, 0.        ]]))

....                                                                     [100%]


### Full forward propagation

In [129]:
def full_forward_propagation(X, params_values, nn_architecture):
    # creating a temporary memory to store the information needed for a backward step
    memory = {}
    # X vector is the activation for layer 0 
    A_curr = X
    
    # iteration over network layers
    for idx, layer in enumerate(nn_architecture):
        # we number network layers from 1
        layer_idx = idx + 1
        # transfer the activation from the previous iteration
        A_prev = A_curr
        
        # extraction of the activation function for the current layer
        activ_function_curr = layer["activation"]
        # extraction of W for the current layer
        W_curr = params_values["W" + str(layer_idx)]
        # extraction of b for the current layer
        b_curr = params_values["b" + str(layer_idx)]
        
        A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, activ_function_curr)
        memory["A" + str(idx)] = A_prev
        memory["Z" + str(layer_idx)] = Z_curr
        
    return A_curr, memory

In [130]:
np.random.seed(6)
X = np.random.randn(5, 4)
W1 = np.random.randn(4, 5)
b1 = np.random.randn(4, 1)
W2 = np.random.randn(3, 4)
b2 = np.random.randn(3, 1)
W3 = np.random.randn(1, 3)
b3 = np.random.randn(1, 1)

params_values = {"W1": W1,
              "b1": b1,
              "W2": W2,
              "b2": b2,
              "W3": W3,
                "b3": b3}

In [131]:
nn_architecture = [
    {"input_dim": 4, "output_dim": 5, "activation": "relu"},
    {"input_dim": 4, "output_dim": 3, "activation": "relu"},
    {"input_dim": 3, "output_dim": 1, "activation": "sigmoid"},
]

In [132]:
A, memory = full_forward_propagation(X, params_values, nn_architecture)

In [134]:
pprint(memory)

{'A0': array([[-0.31178367,  0.72900392,  0.21782079, -0.8990918 ],
       [-2.48678065,  0.91325152,  1.12706373, -1.51409323],
       [ 1.63929108, -0.4298936 ,  2.63128056,  0.60182225],
       [-0.33588161,  1.23773784,  0.11112817,  0.12915125],
       [ 0.07612761, -0.15512816,  0.63422534,  0.810655  ]]),
 'A1': array([[0.        , 3.18040136, 0.4074501 , 0.        ],
       [0.        , 0.        , 3.18141623, 0.        ],
       [4.18500916, 0.        , 0.        , 2.72141638],
       [5.05850802, 0.        , 0.        , 3.82321852]]),
 'A2': array([[ 2.2644603 ,  1.09971298,  0.        ,  1.54036335],
       [ 6.33722569,  0.        ,  0.        ,  4.48582383],
       [10.37508342,  0.        ,  1.63635185,  8.17870169]]),
 'Z1': array([[-5.23825714,  3.18040136,  0.4074501 , -1.88612721],
       [-2.77358234, -0.56177316,  3.18141623, -0.99209432],
       [ 4.18500916, -1.78006909, -0.14502619,  2.72141638],
       [ 5.05850802, -1.25674082, -3.54566654,  3.82321852]]),
 'Z2