In [1]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
from testCases_v4a import *
from dnn_utils_v2 import sigmoid, sigmoid_backward, relu, relu_backward

%matplotlib inline

In [2]:
plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

np.random.seed(1)

In [3]:
def initialize_parameters_deep(layer_dims):
    np.random.seed(3)
    params = {}
    for i in range(len(layer_dims) - 1):
        params['W' + str(i + 1)] = np.random.rand(layer_dims[i + 1], layer_dims[i]) * 0.01
        params['b' + str(i + 1)] = np.zeros((layer_dims[i + 1], 1))
    return params

In [4]:
initialize_parameters_deep([5, 4, 3])

{'W1': array([[0.00550798, 0.00708148, 0.00290905, 0.00510828, 0.00892947],
        [0.00896293, 0.00125585, 0.00207243, 0.00051467, 0.0044081 ],
        [0.00029876, 0.00456833, 0.00649144, 0.00278487, 0.00676255],
        [0.00590863, 0.00023982, 0.00558854, 0.00259252, 0.00415101]]),
 'b1': array([[0.],
        [0.],
        [0.],
        [0.]]),
 'W2': array([[0.00283525, 0.00693138, 0.00440454, 0.00156868],
        [0.00544649, 0.00780315, 0.00306364, 0.00221958],
        [0.00387971, 0.00936384, 0.00975995, 0.00672384]]),
 'b2': array([[0.],
        [0.],
        [0.]])}

In [5]:
def linear_forward(A, W, b):
    Z = np.dot(W, A) + b
    cache = (A, W, b)
    return Z, cache

In [6]:
A, W, b = linear_forward_test_case()
linear_forward(A, W, b)

(array([[ 3.26295337, -1.23429987]]),
 (array([[ 1.62434536, -0.61175641],
         [-0.52817175, -1.07296862],
         [ 0.86540763, -2.3015387 ]]),
  array([[ 1.74481176, -0.7612069 ,  0.3190391 ]]),
  array([[-0.24937038]])))

In [7]:
def linear_activation_forward(A_prev, W, b, activation):
    Z, linear_cache = linear_forward(A_prev, W, b)
    if activation == "sigmoid":
        A, activation_cache = sigmoid(Z)
    elif activation == "relu":
        A, activation_cache = relu(Z)
    assert(A.shape == (W.shape[0], A_prev.shape[1]))
    cache = (linear_cache, activation_cache)
    return A, cache

In [8]:
A_prev, W, b = linear_activation_forward_test_case()
A, cache = linear_activation_forward(A_prev, W, b, activation = "sigmoid")
print("A with sigmoid: " + str(A) + "\ncache: " + str(cache))
A, cache = linear_activation_forward(A_prev, W, b, activation = "relu")
print("A with relu: " + str(A) + "\ncache: " + str(cache))

A with sigmoid: [[0.96890023 0.11013289]]
cache: ((array([[-0.41675785, -0.05626683],
       [-2.1361961 ,  1.64027081],
       [-1.79343559, -0.84174737]]), array([[ 0.50288142, -1.24528809, -1.05795222]]), array([[-0.90900761]])), array([[ 3.43896131, -2.08938436]]))
A with relu: [[3.43896131 0.        ]]
cache: ((array([[-0.41675785, -0.05626683],
       [-2.1361961 ,  1.64027081],
       [-1.79343559, -0.84174737]]), array([[ 0.50288142, -1.24528809, -1.05795222]]), array([[-0.90900761]])), array([[ 3.43896131, -2.08938436]]))


In [9]:
def L_model_forward(X, params):
    A = X
    L = len(params) // 2
    cache = []
    for i in range(1, L):
        A, l_cache = linear_activation_forward(A, params['W' + str(i)], params['b' + str(i)], activation = "relu")
        cache.append(l_cache)
    A, l_cache = linear_activation_forward(A, params['W' + str(L)], params['b' + str(L)], activation = "sigmoid")
    cache.append(l_cache)
    return A, cache

In [10]:
X, params = L_model_forward_test_case_2hidden()
A, cache = L_model_forward(X, params)
print("Result is: " + str(np.squeeze(A)) + "\ncache is: " + str(len(cache)))


Result is: [0.03921668 0.70498921 0.19734387 0.04728177]
cache is: 3


In [11]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    first_sec = np.dot(Y, np.log(AL).T)  # Can also use np.sum() with np.multiply()
    second_sec = np.dot((1 - Y), np.log(1 - AL).T)
    cost = np.squeeze(-(first_sec + second_sec) / m)
    assert(cost.shape == ())
    return cost

In [12]:
Y, AL = compute_cost_test_case()
print("Cost is: " + str(compute_cost(AL, Y)))

Cost is: 0.2797765635793423


In [13]:
def linear_backward(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]
    dW = np.dot(dZ, A_prev.T) / m
    db = np.sum(dZ, axis = 1, keepdims = True)
    dA_prev = np.dot(W.T, dZ)
    return dA_prev, dW, db

In [14]:
dZ, cache = linear_backward_test_case()
dA, dW, db = linear_backward(dZ, cache)
print("dA_prev: " + str(dA) + "\ndW: " + str(dW) + "\ndb: " + str(db))

dA_prev: [[-1.15171336  0.06718465 -0.3204696   2.09812712]
 [ 0.60345879 -3.72508701  5.81700741 -3.84326836]
 [-0.4319552  -1.30987417  1.72354705  0.05070578]
 [-0.38981415  0.60811244 -1.25938424  1.47191593]
 [-2.52214926  2.67882552 -0.67947465  1.48119548]]
dW: [[ 0.07313866 -0.0976715  -0.87585828  0.73763362  0.00785716]
 [ 0.85508818  0.37530413 -0.59912655  0.71278189 -0.58931808]
 [ 0.97913304 -0.24376494 -0.08839671  0.55151192 -0.10290907]]
db: [[-0.58855142]
 [-0.4525262 ]
 [-0.52836405]]


In [15]:
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache
    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
    dA_prev, dW, db = linear_backward(dZ, linear_cache)
    return dA_prev, dW, db

In [16]:
dA, l_a_cache = linear_activation_backward_test_case()
dA_prev, dW, db = linear_activation_backward(dA, l_a_cache, activation = "sigmoid")
print("Sigmoid:\ndA_prev: " + str(dA_prev) + "\ndW: " + str(dW) + "\ndb: " + str(db))
dA_prev, dW, db = linear_activation_backward(dA, l_a_cache, activation = "relu")
print("Relu:\ndA_prev: " + str(dA_prev) + "\ndW: " + str(dW) + "\ndb: " + str(db))

Sigmoid:
dA_prev: [[ 0.11017994  0.01105339]
 [ 0.09466817  0.00949723]
 [-0.05743092 -0.00576154]]
dW: [[ 0.10266786  0.09778551 -0.01968084]]
db: [[-0.11459244]]
Relu:
dA_prev: [[ 0.44090989 -0.        ]
 [ 0.37883606 -0.        ]
 [-0.2298228   0.        ]]
dW: [[ 0.44513824  0.37371418 -0.10478989]]
db: [[-0.41675785]]


In [17]:
def L_model_backward(AL, Y, cache):
    L = len(cache)
    m = Y.shape[1]
    Y = Y.reshape(AL.shape)
    grad = {}
    dAL = -(np.divide(Y, AL) - np.divide((1 - Y), (1 - AL)))
    grad['dA' + str(L - 1)], grad['dW' + str(L)], grad['db' + str(L)] = \
    linear_activation_backward(dAL, cache[L - 1], activation = "sigmoid")
    for i in reversed(range(L - 1)):
        dA, dW, db = linear_activation_backward(grad['dA' + str(i + 1)], cache[i], activation = "relu")
        grad['dA' + str(i)], grad['dW' + str(i + 1)], grad['db' + str(i + 1)] = dA, dW, db
    return grad

In [18]:
AL, Y, cache = L_model_backward_test_case()
print_grads(L_model_backward(AL, Y, cache))

dW1 = [[0.41010002 0.07807203 0.13798444 0.10502167]
 [0.         0.         0.         0.        ]
 [0.05283652 0.01005865 0.01777766 0.0135308 ]]
db1 = [[-0.44014127]
 [ 0.        ]
 [-0.05670698]]
dA1 = [[ 0.12913162 -0.44014127]
 [-0.14175655  0.48317296]
 [ 0.01663708 -0.05670698]]


In [19]:
def update_parameters(params, grad, learning_rate):
    L = len(params) // 2
    for i in range(1, L + 1):
        params["W" + str(i)] = params["W" + str(i)] - learning_rate * grad["dW" + str(i)]
        params["b" + str(i)] = params["b" + str(i)] - learning_rate * grad["db" + str(i)]
    return params

In [20]:
params, grad = update_parameters_test_case()
update_parameters(params, grad, 0.1)

{'W1': array([[-0.59562069, -0.09991781, -2.14584584,  1.82662008],
        [-1.76569676, -0.80627147,  0.51115557, -1.18258802],
        [-1.0535704 , -0.86128581,  0.68284052,  2.20374577]]),
 'b1': array([[-0.04659241],
        [-1.28888275],
        [ 0.53405496]]),
 'W2': array([[-0.55569196,  0.0354055 ,  1.32964895]]),
 'b2': array([[-0.84610769]])}

In [21]:
def predict(X, y, parameters):
    
    m = X.shape[1]
    n = len(parameters) // 2
    p = np.zeros((1,m))
    
    probas, caches = L_model_forward(X, parameters)

    for i in range(0, probas.shape[1]):
        if probas[0,i] > 0.5:
            p[0,i] = 1
        else:
            p[0,i] = 0
    
    print("Accuracy: "  + str(np.sum((p == y)/m)))
        
    return p

In [22]:
def print_mislabeled_images(classes, X, y, p):
    a = p + y
    mislabeled_indices = np.asarray(np.where(a == 1))
    plt.rcParams['figure.figsize'] = (40.0, 40.0)
    num_images = len(mislabeled_indices[0])
    for i in range(num_images):
        index = mislabeled_indices[1][i]
        
        plt.subplot(2, num_images, i + 1)
        plt.imshow(X[:,index].reshape(64,64,3), interpolation='nearest')
        plt.axis('off')
        plt.title("Prediction: " + classes[int(p[0,index])].decode("utf-8") + " \n Class: " + classes[y[0,index]].decode("utf-8"))