In [1]:
import numpy as np
import sys
sys.path.append('../activation_function/')
from activation_function import sigmoid_backward, relu_backward

### Steps breakdown:

1. LINEAR backward
2. LINEAR -> ACTIVATION backward where ACTIVATION computes the derivative of either the ReLU or sigmoid activation
3. LINEAR -> RELU -> LINEAR -> SIGMOID backward

### LINEAR backward
For layer  𝑙 , the linear part is:  𝑍[𝑙]=𝑊[𝑙]𝐴[𝑙−1]+𝑏[𝑙]  (followed by an activation).

Suppose you have already calculated the derivative  𝑑𝑍[𝑙]=∂J / ∂𝑍[𝑙] . You want to get  (𝑑𝑊[𝑙],𝑑𝑏[𝑙],𝑑𝐴[𝑙−1])

In [2]:
def linear_backward(dZ, cache) : 
    """
    Implement the linear portion of backward propagation for a single layer (layer l)

    Arguments:
    dZ -- Gradient of the cost with respect to the linear output (of current layer l)
    cache -- tuple of values (A_prev, W, b) coming from the forward propagation in the current layer

    Returns:
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    """
        
    A_prev, W, b = cache
    
    dW = (1/m)*np.dot(dZ, A_prev.T)
    db = (1/m)*np.sum(dZ , axis =1, keepdims = True)
    dA_prev = (1/m)*np.dot(W.T, dZ)
    
    return dA_prev, dW, db

### LINEAR - ACTIVATION backward
                                        𝑑𝑍[𝑙]=𝑑𝐴[𝑙]∗𝑔′(𝑍[𝑙])

In [3]:
def linear_activation_backward(dA, cache, activation):
    """
    Implement the backward propagation for the LINEAR->ACTIVATION layer.
    
    Arguments:
    dA -- post-activation gradient for current layer l 
    cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
    activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
    
    Returns:
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    """
    
    linear_cache, activation_cache = cache
    
    if(activation == 'sigmoid'):
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
        
    elif(activation == 'relu'):
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
        
    return dA_prev, dW, db

### Model backward

In [5]:
def model_backward(AL, Y, caches):
    """
    Implement the backward propagation for the LINEAR->RELU-> LINEAR -> SIGMOID group
    
    Arguments:
    AL -- probability vector, output of the forward propagation (L_model_forward())
    Y -- true "label" vector (containing 0 if day, 1 if night)
    caches -- list of caches containing:
                every cache of linear_activation_forward() with "relu" (it's caches[0])
                the cache of linear_activation_forward() with "sigmoid" (it's caches[1])
    
    Returns:
    grads -- A dictionary with the gradients
             grads["dA" + str(l)] = ... 
             grads["dW" + str(l)] = ...
             grads["db" + str(l)] = ... 
    """
    
    grads = {}
    L = len(caches) #no. of layers
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    
    # Initialize backpropagation by finding dAL
    dAL = - (np.divide(Y,AL) - np.divide(1-Y,1-AL))
    
    current_cache = caches[L-1]
    grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, 'sigmoid')
    
    for l in reversed(range(L-1)):

        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads['dA'+str(l+1)], current_cache, 'relu')
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp
    
    return grads