In [1]:
# Needed packages:
import numpy as np
import h5py
import matplotlib.pyplot as plt   

In [2]:
def relu_backward(dA, Z):
    """
    Implement the backward propagation for a single RELU unit.
    Arguments:
    dA -- post-activation gradient, of any shape
    cache -- 'Z' where we store for computing backward propagation efficiently
    Returns:
    dZ -- Gradient of the cost with respect to Z
    """
    
    dZ = np.array(dA, copy=True) # just converting dz to a correct object.
    
    # When z <= 0, you should set dz to 0 as well. 
    dZ[Z <= 0] = 0
    
    assert (dZ.shape == Z.shape)
    
    return dZ

In [3]:
def sigmoid_backward(dA, Z):
    """
    Implement the backward propagation for a single SIGMOID unit.
    Arguments:
    dA -- post-activation gradient, of any shape
    cache -- 'Z' where we store for computing backward propagation efficiently
    Returns:
    dZ -- Gradient of the cost with respect to Z
    """
    
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    
    assert (dZ.shape == Z.shape)
    
    return dZ

In [4]:
def linear_backward(dZ, cache):
    """
    This function computes the linear part of the backward propagation for a single layer (layer l)
    
    Arguments:
        dZ: gradient of the cost function J wrt the linear component of layer [l]
     cache: tuple containing the values (A, W, b) of the current layer [l], output of the function DeepNN_LinActivation_forward
        
        
    returns:
        dA_prev: Gradient of the cost wrt activation A[l - 1]; same shape as A[l - 1]
             dW: Gradient of the cost wrt weight W[l]; same shape as W[l]
             db: Gradient of the cost wrt bias b[l]; same shape as b[l]
    """
    
    A_prev, W, b = cache
    
    m = A_prev.shape[1]
    
    dW = (1 / m) * np.dot(dZ, A_prev.T)
    db = (1 / m) * np.sum(dZ, axis = 1, keepdims = True)
    dA_prev = np.dot(W.T, Z)
    
    assert (dW.shape == W.shape)
    assert (db.shape == b.shape)
    assert (dA_prev.shape == A_prev.shape)
    
    return dA_prev, dW, db    

In [5]:
def DeepNN_LinActivation_backward(dA, cache, activation):
    """
    Implement the backward propagation for the LINEAR->ACTIVATION layer.
    
    Arguments:
                dA: post-activation gradient for current layer l
             cache: contains linear and activation caches
        activation: either sigmoid or ReLu
    
    returns:
        dA_prev: Gradient of the cost wrt activation A[l - 1]; same shape as A[l - 1]
         dW: Gradient of the cost wrt weight W[l]; same shape as W[l]
         db: Gradient of the cost wrt bias b[l]; same shape as b[l]

    """
    
    linear_cache, activation_cache = cache
    
    if activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    elif activation == "ReLu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    
    return dA_prev, dW, db    

In [6]:
def deepNN_backward_model(AL, Y, caches):
    """
    Implement the backward propagation for the [LINEAR->RELU] * (L-1) times -> LINEAR -> SIGMOID group
    
    Arguments:
    AL: probability vector, output of the forward propagation 
    Y: actual labels of the training examples 
    caches -- list of caches containing:
                every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2)
                the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1])
    
    Returns:
    grads -- A dictionary with the gradients
             grads["dA" + str(l)] = ... 
             grads["dW" + str(l)] = ...
             grads["db" + str(l)] = ... 
    """
    
    grads = {} # empty dictionary to contain the gradient values
    
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape) # to convert Y shape to be the same as AL
    
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    
    current_cache = caches[L - 1]
    grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)] = DeepNN_LinActivation_backward(dAL, current_cache, 'sigmoid')
    
    
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        grads["dA" + str(l)], grads["dW" + str(l)], grads["db" + str(l)] = DeepNN_LinActivation_backward(grads["dA" + str(l + 1)], current_cache, 'ReLu')
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp
    
    return grads

In [None]:
def update_parameters(parameters, grads, alpha):
    
    L = len(parameters)
    
    
    for l in range(L):
        parameters["W" + str(l + 1)] = parameters["W"+str(l + 1)] - alpha * grads["dW" + str(l + 1)]
        parameters["b" + str(l + 1)] = - grads["db" + str(l + 1)]