### Implementation From Scratch of Deep N-Layer NN

In [5]:
import numpy as np
import matplotlib.pyplot as plt
from dnn_utils import sigmoid, relu, relu_backward, sigmoid_backward
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [4]:
plt.rcParams['figure.figsize'] = (5.0, 4.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

----------------------------------------------------------
**Steps Needed to Implmenet**

1. Intialize Parameters (W,b)
2. Implement Forward Prop Layers in loop (N-Layers)
3. Compute Loss function
4. Carry out back prop to get new parameters by calculating derivatives at every layer
5. Update parameters using Gradient Descent
6. Make Prediction
 

In [17]:
# Step1. Intilaie Parameters
def intialize_parameters(layers_dim):
    # layer_dims : array of dims of each layer
    parameters = {}
    L = len(layers_dim) # number of layers in network
    for l in range(L):
        parameters["W"+str(l)] = np.random.randn(layers_dim[l],layers_dim[l-1]) * 0.01
        parameters["b"+str(l)] = np.zeros((layers_dim[l],1))
    
    return parameters

In [20]:
# Step2. Implement Forward prop - linear part
def linear_forward_prop(A,W,b):
    Z = np.dot(A,W) + b
    cache = (A,W,b)
    return Z, cache

In [22]:
# Step2. Implement Forward prop - non linear (activation) part
def single_layer_forward_prop(A_prev, W, b, activation):
    if activation == 'sigmoid':
        Z, linear_cache = linear_forward_prop(A_prev, W, b)
        A,activation_cache = sigmoid(Z) # sigmoid is implmented in dnn_utils.py
    elif activation == 'relu':
        Z, linear_cache = linear_forward_prop(A_prev, W, b)
        A, activation_cache = relu(Z) # sigmoid is implmented in dnn_utils.py
    cache =   (linear_cache, activation_cache)
    return A, cache

In [23]:
def L_layer_forward_prop(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2
    for l in range(1,L):
        A_prev = A
        A, cache = single_layer_forward_prop(A_prev,parameters['W'+str(l)],parameters['b'+str(l)],'relu')
        caches.append(cache)

    AL, cache = single_layer_forward_prop(A,parameters['W'+str(L)],parameters['b'+str(L)],'sigmoid')
    caches.append(cache)

    return AL, caches
        

In [24]:
# Step 3. Compute Cost Function (cross-entropy in this case)
def computer_cost(AL,Y):

    m = Y.shape[1] # total examples
    cost += np.multiply(Y,np.log(AL)) + np.multiply((1-Y,np.log(1-AL)))
    cost /= m
    np.squeeze(cost)

    return cost
    
    

In [26]:
# Step 4. Back Prop - calculate gradient of loss func wrt paramters 
def linear_backward(dZ,cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]

    dW = np.dot(dZ,A_prev.T) / m
    db = (np.sum(dZ,axis=1,keepdims=True)) /m
    dA_prev = np.dot(W.T,dZ)

    return dA_prev,dW,db

In [27]:
def linear_activation_backward(dA,cache,activation):
    linear_cache, activation_cache = cache
    if activation == 'relu':
        dZ = relu_backward(dA,activation_cache)
        dA_prev, dW, db = linear_backward(dZ,linear_cache)
        
    if activation == 'sigmoid':
        dZ = sigmoid_backward(dA,activation_cache)
        dA_prev, dW, db = linear_backward(dZ,linear_cache)
    return dA_prev, dW, db

In [28]:
def L_layer_backward_prop(AL,Y,caches):
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)

    dAL = -(np.divide(Y,AL) - np.divide(1-Y,1-AL))
    current_cache = caches[L-1]
    dA_prev, dW, db = linear_activation_backward(dAL, current_cache,'sigmoid')

    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev, dW, db = linear_activation_backward(grads["dA"+str(l)],current_cache,'relu')
        grads["dA"+str(l)] = dA_prev
        grads["dW"+str(l+1)] = dW
        grads["db"+str(l+1)] = db
    
    return grads
    

In [29]:
# Step 5. Update Paramters 
def update_parameters(params, grads, learning_rate):
    paramters = copy.deepcopy(params)
    L = len(parameters) // 2

    for l in range(L):
        parameters['W'+str(l+1)] += -learning_rate*grads['dW'+str(l+1)]
        parameters['b'+str(l+1)] += -learning_rate*grads['db'+str(l+1)]
    return parameters