# Neural Network From Scratch

In [63]:
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(1)

In [64]:
def initialize_parameters(size_x, sizes_h, size_y):
    """ Initializes weights and bias units for nn.
    
    Parameters
    ----------
    size_x: int
        Size of input layer, X
    sizes_h: int or list 
        Size(s) of hidden layer. If more than a single hidden layer,
        a list of layer sizes must be supplied.
    size_y: int
        Size of output layer, Y
    
    Returns
    -------
    dict:
        dictionary of parameters with keys W1, W2,... Wn and b1, b2,... bn
    """
    if type(sizes_h) == list:
        layer_sizes = [size_x] + sizes_h + [size_y]
    elif type(sizes_h) == int:
        layer_sizes = [size_x] + [sizes_h] + [size_y]
    else:
        raise TypeError('Invalid parameter: sizes_h must be an array or an int')

    parameters = {}
    for i in range(1, len(layer_sizes)):
        parameters['W' + str(i)] = np.random.randn(layer_sizes[i], layer_sizes[i-1]) * 0.01
        parameters['b' + str(i)] = np.zeros(layer_sizes[i])
        
    return parameters

In [65]:
def activation_relu(Z):
    """ ReLu activation function.
    
    parameters
    ----------
    Z: numpy array
        Can be any shape.
    
    returns
    -------
    numpy matrix:
        Post-activation parameter of same shape as Z.
    """
    return np.maximum(0, Z)

In [66]:
def activation_sigmoid(Z):
    """ Sigmoid activation function.
    
    parameters
    ----------
    Z: numpy array
        Can be any shape.
    
    returns
    -------
        Post-activation parameter of same shape as Z.
    """
    return 1/(1 + np.exp(-Z))

In [62]:
def relu_derivative(dJ_dA, Z):
    """ The gradient of a ReLu unit during backpropogation.
    
    Formula for gradient: dJ/dZ = dJ/dA * dA/dZ
    dA/dZ is also known as g'(Z)
    
    parameters
    ----------
    dJ_dA: numpy array
        The post-activation gradient (any shape).
    
    Z: numpy array
    
    
    returns
    -------
    numpy array:
        The derivitive dJ_dZ (i.e. gradient of the cost with respect to Z, dJ/dZ) of same shape as dJ_dA.
    """
    # note: g(Z) = derivative of ReLu = `np.where(Z > 0, 1, 0)`
    dJ_dZ = dJ_dA * np.where(Z > 0, 1, 0)
    
    assert dJ_dZ.shape == dJ_dA.shape
    return dJ_dZ

In [67]:
def sigmoid_derivative(dJ_dA, Z):
    """ The gradient of a sigmoid unit during backpropogation.
    
    parameters
    ----------
    dJ_dA: numpy array
        The post-activation gradient (any shape).
        
    Z: numpy array
    
    
    returns
    -------
    numpy array:
        The derivitive dJ_dZ (i.e. gradient of the cost with respect to Z, dJ/dZ) of same shape as dJ_dA.
    
    """
    sigmoid = 1 / (1 + np.exp(-Z))
    # note: g(Z) = derivative of sigmoid = `(sigmoid * (1 - sigmoid))`
    dJ_dZ = dJ_dA * (sigmoid * (1 - sigmoid))
    
    assert dJ_dZ.shape == dJ_dA.shape
    return 