<a href="https://colab.research.google.com/github/EChin179/machinelearning/blob/master/Deep_Neural_Network_Setup.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [73]:
import time
import numpy as np
import scipy
from scipy import ndimage
import h5py # interact with a dataset on an H5 file
import matplotlib.pyplot as plt
import tensorflow as tf
from PIL import Image

%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

np.random.seed(1)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [0]:
""" FUNCTIONS """

# INITALIZE PARAMETERS
def initialize_parameters(layer_dims):
    parameters = {}
    L = len(layer_dims) # number of layers in network

    for l in range(1, L):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))

        assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l-1]))
        assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))
    
    return parameters

# 1 STEP OF FORWARD PROPAGATION 
def linear_activation_forward(A_prev, W, b, activation):
    if activation == 'sigmoid':
        Z = np.dot(W, A_prev) + b
        A = 1 / (1 + np.exp(-Z))
    elif activation == 'relu':
        Z = np.dot(W, A_prev) + b
        A = max(Z, 0)
        # A[Z <= 0] = 0

    cache = (A_prev, W, b, Z)
    assert (A.shape == (W.shape[0], A_prev.shape[1]))

    return A, cache

# FORWARD PROPAGATION 
def L_model_forward(X, parameters):
    # Forward propagation for [Linear > Relu] * (L-1) --> [Linear > Sigmoid]
    caches = []
    A = X
    L = len(parameters) // 2 # number of layers in network

    for l in range(1, L):
        A_prev = A
        A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], 'relu')
        caches.append(cache)

    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], 'sigmoid')
    caches.append(cache)

    assert(AL.shape == (1,X.shape[1]))
    return AL, caches

# COMPUTE COST 
def compute_cost(AL, Y):
    # Compute cost
    m = Y.shape[1]
    cost = (-1/m) * np.sum(Y*np.log(AL) + (1-Y)*np.log(1-AL))
    cost = np.squeeze(cost)

    assert(cost.shape == ())
    return cost

# 1 STEP OF BACK PROPAGATION 
def linear_activation_backward(dA, cache, activation):
    # Implement the backward propagation for the LINEAR > ACTIVATION layer
    A_prev, W, b, Z = cache
    m = A_prev.shape[1]

    if activation == 'relu':
        dZ = np.array(dA, copy=True) 
        dZ[Z <= 0] = 0
    elif activation == 'sigmoid':
        s = 1/(1+np.exp(-Z))
        dZ = dA * s * (1-s)

    dW = (1/m)*np.dot(dZ, A_prev.T)
    db = (1/m) * np.sum(dZ, axis=1, keepdims=True) # summing over all training examples
    dA_prev = np.dot(W.T, dZ)

    assert (dA_prev.shape == A_prev.shape)
    assert (dW.shape == W.shape)
    assert (db.shape == b.shape)
    assert (dZ.shape == Z.shape)

    return dA_prev, dW, db

# BACK PROPAGATION 
def L_model_backward(AL, Y, caches):
    # Implement the backward propagation for the [LINEAR > RELU] * (L-1) --> LINEAR > SIGMOID group
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape) # Y converts to become the same shape as AL
    
    # initialize back propagation with last layer L
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    current_cache = caches[L-1]
    grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, 'sigmoid')
    
    for l in reversed(range(1, L)): # loop from L-1 to 1. find RELU > LINEAR gradients.
        current_cache = caches[l - 1]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l)], current_cache, 'relu')
        grads["dA" + str(l - 1)] = dA_prev_temp
        grads["dW" + str(l)] = dW_temp
        grads["db" + str(l)] = db_temp

    return grads

# UPDATE PARAMETERS
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2 # number of layers in network

    for l in range(1, L+1): # loop from 1 to L
        parameters["W" + str(l)] = parameters["W" + str(l)] - learning_rate * grads["dW" + str(l)]
        parameters["b" + str(l)] = parameters["b" + str(l)] - learning_rate * grads["db" + str(l)]

    return parameters