In [None]:
def backward_propagation_step(dL_dA, cache, activation):
    """
    dL_dA - activation gradient for current layer l
    cache - (W, b, A_prev, Z) stored for current layer  l
    activation - string: "sigmoid" or "relu"
    
    Returns:
    dL_dA_prev - Gradient activation of the previous layer l-1, same shape as A_prev
    dL_dW - Gradient of W current layer l, same shape as W
    dL_db - Gradient of b (current layer l), same shape as b
    """
    W, b, A_prev, Z = cache 

    # backward activation part:
    if activation == "relu":
        dg_dz = relu_backward(Z)
    elif activation == "sigmoid":
        dg_dz = sigmoid_backward(Z)
        
    assert (dL_dA.shape == dg_dz.shape)
    dL_dZ = dL_dA * dg_dz

    # backward linear part:
   
    dL_dW = 1 / A_prev.shape[1] * np.dot(dL_dZ, A_prev.T)
    dL_db = 1 / A_prev.shape[1] * np.sum(dL_dZ, axis=1, keepdims=True)
    dL_dA_prev = np.dot(W.T, dL_dZ)
    

    assert (dL_dA_prev.shape == A_prev.shape)
    assert (dL_dW.shape == W.shape)
    assert (dL_db.shape == b.shape)

    return dL_dA_prev, dL_dW, dL_db
        

def relu_backward(Z):
    dg_dz = np.where(Z > 0, 1, 0)
    assert (dg_dz.shape == Z.shape)    
    return dg_dz



def sigmoid_backward(Z):

    dg_dz = sigmoid(Z) * (1 - sigmoid(Z))
    assert (dg_dz.shape == Z.shape)    
    return dg_dz


