In [8]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
from testCases_v4a import *
from dnn_utils_v2 import sigmoid, relu
import scipy
from PIL import Image
from scipy import ndimage
from dnn_app_utils_v3 import *


%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

np.random.seed(1)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
train_x_orig, train_y, test_x_orig, test_y, classes = load_data()

OSError: Unable to open file (unable to open file: name = 'datasets/train_catvnoncat.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [2]:
# GRADED FUNCTION: initialize_parameters_deep

def initialize_parameters_deep(layer_dims):
    """
    Arguments:
    layer_dims -- python array (list) containing the dimensions of each layer in our network
    
    Returns:
    parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
                    Wl -- weight matrix of shape (layer_dims[l], layer_dims[l-1])
                    bl -- bias vector of shape (layer_dims[l], 1)
    """
    
    np.random.seed(3)
    parameters = {}
    L = len(layer_dims)            # number of layers in the network

    for l in range(1, L):
        ### START CODE HERE ### (≈ 2 lines of code)
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
        ### END CODE HERE ###
        
        assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l-1]))
        assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))

        
    return parameters

In [None]:
def forward_propagation(X, parameters):
    
    L = len(parameters)//2
    cache = {}
    
    A_prev = X
    
    for l in range(1, L):
        W = parameters['W' + str(l)]
        b = parameters['b' + str(l)]
        
        Z = np.dot(W,A_prev)+b
        A_prev = np.maximum(0,Z)  #RELU function
        
        cache['Z' + str(l)] = Z
        cache['A' + str(l)] = A_prev
        
     
     W = parameters['W' + str(L)]
     b = parameters['b' + str(L)]
     Z = np.dot(W,A_prev)+b
     AL = 1/(1+np.exp(-Z))  #Sigmoid function
     
     cache['Z' + str(L)] = Z
     cache['A' + str(L)] = AL
        
     return AL, cache

In [16]:
# GRADED FUNCTION: compute_cost

def compute_cost(AL, Y):
    """

    Arguments:
    AL -- probability vector corresponding to your label predictions, shape (1, number of examples)
    Y -- true "label" vector (for example: containing 0 if non-cat, 1 if cat), shape (1, number of examples)

    Returns:
    cost -- cross-entropy cost
    """
    
    m = Y.shape[1]

    J = np.multiply(np.log(AL),Y)+np.multiply(np.log(1-AL),(1-Y))
    cost = -(1/m)*np.sum(J)
    
    cost = np.squeeze(cost)      # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).
    assert(cost.shape == ())
    
    return cost

In [None]:
def backward_propagation(parameters, cache, X, Y):
    """
    Arguments:
    parameters -- python dictionary containing our parameters 
    cache -- a dictionary containing "Z1", "A1", "Z2" and "A2".
    X -- input data of shape (2, number of examples)
    Y -- "true" labels vector of shape (1, number of examples)
    
    Returns:
    grads -- python dictionary containing your gradients with respect to different parameters
    """
    
    m = X.shape[1]
    
    L = len(parameters)//2
    grads = {}   
    cache['A' + str(0)] = X
        
    W = parameters['W' + str(L)]
    b = parameters['b' + str(L)]
    
    AL = cache['A' + str(L)]
    
    dZ = AL-Y
    A_prev = cache['A' + str(L-1)]
    dW = (1/m)*np.dot(dZ,A_prev.T)
    db = (1/m)*np.sum(dZ,axis=1,keepdims=True)
    
    grads["dW"+str(L)] = dW
    grads["db"+str(L)] = db
    grads["dZ"+str(L)] = dZ
    
       
    
    for l in range(L-1,0,-1): # from reverse
        
        A = cache['A' + str(l)]      
        W = parameters['W' + str(l+1)]
        dZ_next = grads["dZ"+str(l+1)]
        A_prev = cache['A' + str(l-1)]
        
        temp = np.zeros(A.shape)
        temp[A < 0] = 0
        temp[A >= 0] = 1
        
        dZ = np.dot(W.T,dZ_next)*temp
        dW = (1/m)*np.dot(dZ,A_prev.T)
        db = (1/m)*np.sum(dZ,axis=1,keepdims=True)
        
        grads["dW"+str(l)] = dW
        grads["db"+str(l)] = db
        grads["dZ"+str(l)] = dZ
    

    
    return grads

In [20]:
def update_parameters(parameters, grads, learning_rate):
    """
    Arguments:
    parameters -- python dictionary containing your parameters 
    grads -- python dictionary containing your gradients
    
    Returns:
    parameters -- python dictionary containing your updated parameters 
                  parameters["W" + str(l)] = ... 
                  parameters["b" + str(l)] = ...
    """
    
    L = len(parameters) // 2 # number of layers in the neural network
    
    for l in range(L):
        parameters["W" + str(l+1)] = parameters["W"+str(l+1)]-(learning_rate*grads["dW" + str(l+1)])
        parameters["b" + str(l+1)] = parameters["b"+str(l+1)]-(learning_rate*grads["db" + str(l+1)])
    
    return parameters