In [2]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
from testCases_v2 import *
from dnn_utils_v2 import sigmoid, sigmoid_backward, relu, relu_backward

%matplotlib inline

np.random.seed(1)

In [40]:
def initialize_parameters(*layer):
    np.random.seed(1)
    w_list = list(zip(layer[1:], layer[:-1]))
    b_list = layer[1:]
    parameters = {}
    for i in range(len(w_list)):
        wkey = "W" + str(i+1)
        bkey = "b" + str(i+1)
        parameters[wkey] = np.random.randn(*w_list[i]) * 0.01
        parameters[bkey] = np.zeros(w_list[i])
    return(parameters)

In [41]:
print(initialize_parameters(2, 2, 1))

{'W1': array([[ 0.01624345, -0.00611756],
       [-0.00528172, -0.01072969]]), 'b1': array([[0., 0.],
       [0., 0.]]), 'W2': array([[ 0.00865408, -0.02301539]]), 'b2': array([[0., 0.]])}


In [44]:
def initialize_parameters_deep(layer_dims):
    np.random.rand(3)
    parameters = {}
    L = len(layer_dims)
    
    for l in range(L):
        parameters["W"+str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
        parameters["b"+str(l)] = np.zeros((layer_dims[l], 1))
        
    assert(parameters["W"+str(l)].shape == (layer_dims[l], layer_dims[l-1]))
    assert(parameters["b"+str(l)].shape == (layer_dims[l], 1))
    
    return parameters

In [45]:
print(initialize_parameters_deep([5, 4, 3]))

{'W0': array([[ 0.00346859,  0.0051293 , -0.00298093],
       [ 0.00488518, -0.00075572,  0.01131629],
       [ 0.01519817,  0.02185575, -0.01396496],
       [-0.01444114, -0.00504466,  0.00160037],
       [ 0.00876169,  0.00315635, -0.02022201]]), 'b0': array([[0.],
       [0.],
       [0.],
       [0.],
       [0.]]), 'W1': array([[-0.00306204,  0.00827975,  0.00230095,  0.00762011, -0.00222328],
       [-0.00200758,  0.00186561,  0.00410052,  0.001983  ,  0.00119009],
       [-0.00670662,  0.00377564,  0.00121821,  0.01129484,  0.01198918],
       [ 0.00185156, -0.00375285, -0.0063873 ,  0.00423494,  0.0007734 ]]), 'b1': array([[0.],
       [0.],
       [0.],
       [0.]]), 'W2': array([[-0.00343854,  0.00043597, -0.00620001,  0.00698032],
       [-0.00447129,  0.01224508,  0.00403492,  0.00593579],
       [-0.01094912,  0.00169382,  0.00740556, -0.00953701]]), 'b2': array([[0.],
       [0.],
       [0.]])}


In [73]:
def sigmoid(Z):
    A = 1. / (1 + np.exp(-1*Z))
    cache = Z
    return A, cache

In [74]:
def relu(Z):
    A = np.maximum(0, Z)
    cache = Z
    return A, cache

In [75]:
def linear_forward(A, W, b):
    Z = np.dot(W, A) + b
    cache = (A, W, b)
    return Z, cache

In [76]:
def linear_activation_forward(A_prev, W, b, activation):
    if activation == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)
    elif activation == "relu":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)
        
    cache = (linear_cache, activation_cache)
    return A, cache

In [77]:
def L_model_forward(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2
    for l in range(1, L):
        A_prev = A
        A, cache = linear_activation_forward(A_prev,
                                             parameters["W"+str(l)],
                                             parameters["b"+str(l)],
                                             activation='relu')
        caches.append(cache)

    AL, cache = linear_activation_forward(A,
                                          parameters["W"+str(L)],
                                          parameters["b"+str(L)],
                                          activation='sigmoid')
    caches.append(cache)
    
    return AL, caches

In [78]:
X, parameters = L_model_forward_test_case()
AL, caches = L_model_forward(X, parameters)
print("AL = " + str(AL))
print("Length of caches list = " + str(len(caches)))

AL = [[0.17007265 0.2524272 ]]
Length of caches list = 2


In [81]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    cost = (-1. / m) * np.sum(np.multiply(Y, np.log(AL) + np.multiply((1-Y), np.log(1-AL))))
    cost = np.squeeze(cost)
    assert(cost.shape == ())
    return cost

In [82]:
Y, AL = compute_cost_test_case()
print("cost = " + str(compute_cost(AL, Y)))

cost = 0.41493159961539694


In [85]:
def linear_backward(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]
    
    dW = (1./m) * np.dot(dZ, cache[0].T)
    db = (1./m) * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(cache[1].T, dZ)
    
    return dA_prev, dW, db

In [86]:
# Set up some test inputs
dZ, linear_cache = linear_backward_test_case()

dA_prev, dW, db = linear_backward(dZ, linear_cache)
print ("dA_prev = "+ str(dA_prev))
print ("dW = " + str(dW))
print ("db = " + str(db))

dA_prev = [[ 0.51822968 -0.19517421]
 [-0.40506361  0.15255393]
 [ 2.37496825 -0.89445391]]
dW = [[-0.10076895  1.40685096  1.64992505]]
db = [[0.50629448]]


In [88]:
def relu_backward(dA, cache):
    Z = cache
    dZ = np.array(dA, copy=True)
    dZ[ Z <= 0] = 0
    
    assert (dZ.shape == Z.shape)
    return dZ

In [89]:
def sigmoid_backward(dA, cache):
    Z = cache
    s = 1 / (1 + np.exp(-Z))
    dZ = dA * s * (1-s)
    
    assert (dZ.shape == Z.shape)
    return dZ

In [92]:
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache
    
    if activation == 'relu':
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    elif activation == 'sigmoid':
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    
    return dA_prev, dW, db    

In [93]:
AL, linear_activation_cache = linear_activation_backward_test_case()

dA_prev, dW, db = linear_activation_backward(AL, linear_activation_cache, activation = "sigmoid")
print ("sigmoid:")
print ("dA_prev = "+ str(dA_prev))
print ("dW = " + str(dW))
print ("db = " + str(db) + "\n")

dA_prev, dW, db = linear_activation_backward(AL, linear_activation_cache, activation = "relu")
print ("relu:")
print ("dA_prev = "+ str(dA_prev))
print ("dW = " + str(dW))
print ("db = " + str(db))

sigmoid:
dA_prev = [[ 0.11017994  0.01105339]
 [ 0.09466817  0.00949723]
 [-0.05743092 -0.00576154]]
dW = [[ 0.10266786  0.09778551 -0.01968084]]
db = [[-0.05729622]]

relu:
dA_prev = [[ 0.44090989 -0.        ]
 [ 0.37883606 -0.        ]
 [-0.2298228   0.        ]]
dW = [[ 0.44513824  0.37371418 -0.10478989]]
db = [[-0.20837892]]
