In [1]:
import numpy as np
from utils import *
from nn_building_blocks import *

### 1D Gradient Checking

In [2]:
def forward_propagation(x, theta):
    J = x * theta
    return J

In [3]:
def backward_propagation(x, theta):
    return x

In [4]:
def gradient_check(x, theta, epsilon=1e-7, print_msg=False):
    theta_plus = theta + epsilon
    theta_minus = theta - epsilon
    J_plus = forward_propagation(x, theta_plus)
    J_minus = forward_propagation(x, theta_minus)
    gradapprox = (J_plus-J_minus) / (2*epsilon)

    grad = backward_propagation(x, theta)

    numerator = np.linalg.norm(grad-gradapprox)
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)

    difference = numerator / denominator
    
    if print_msg:
        if difference > 2e-7:
            print ("\033[93m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m")
        else:
            print ("\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m")
    
    return difference

### N-D Gradient Checking

In [5]:
def forward_propagation_n(X, Y, parameters):
    m = X.shape[1]
    caches = []
    L = len(parameters)//2
    A = X

    for l in range(1,L):
        A_prev = A
        A, cache = linear_activation_forward(A_prev, parameters["W"+str(l)], parameters["b"+str(l)], "relu")
        caches += [cache[1], A, cache[0][1], cache[0][2]]

    AL, cache = linear_activation_forward(A, parameters["W"+str(L)], parameters["b"+str(L)], "sigmoid")
    caches += [cache[1], AL, cache[0][1], cache[0][2]]

    cost = (1./m) * np.sum(np.multiply(-np.log(AL), Y) + np.multiply(-np.log(1-AL), 1-Y))
    
    caches = tuple(caches)
    return cost, caches

In [6]:
def backward_propagation_n(X, Y, cache):
    grads = {}
    L = len(cache)//4
    m = X.shape[1]
    AL = cache[-3]
    Y = Y.reshape(AL.shape)

    dZL = AL - Y
    dWL = (1 / m) * np.dot(dZL, cache[-7].T)
    dbL = (1 / m) * np.sum(dZL, axis=1, keepdims=True)
    dA_prev = np.dot(cache[-2].T, dZL)

    # grads["dA" + str(L-1)] = dA_prev
    # grads["dZ" + str(L)] = dZL
    grads["dW" + str(L)] = dWL
    grads["db" + str(L)] = dbL

    for l in reversed(range(L-1)):
        dA_current = dA_prev
        
        Zl = cache[((l+1)*4)-4]
        Al = cache[((l+1)*4)-3]
        Wl = cache[((l+1)*4)-2]
        bl = cache[((l+1)*4)-1]
        
        if l > 0:
            A_prev = cache[l*4-3]
        else:
            A_prev = X
        
        dZl = np.multiply(dA_current, np.int64(Al > 0))
        
        dA_prev, dWl, dbl = linear_activation_backward(dA_current, ((A_prev, Wl, bl), Zl) ,"relu")
        
        # grads["dA" + str(l)] = dA_prev
        # grads["dZ" + str(l+1)] = dZl
        grads["dW" + str(l+1)] = dWl
        grads["db" + str(l+1)] = dbl    

    return grads

In [7]:
def dictionary_to_vector(parameters):

    keys = []
    theta = []
    
    L = len(parameters) // 2
    
    for l in range(1, L + 1):
        
        for param in ['W', 'b']:
            key = param + str(l)
            vector = parameters[key].reshape(-1, 1)
            keys += [key] * vector.shape[0]
            theta.append(vector)
    
    theta = np.concatenate(theta, axis=0)
    return theta, keys

In [8]:
def gradients_to_vector(gradients):
    
    theta = []
    L = len(gradients) // 2

    for l in range(1, L + 1):
        
        for grad in ['dW', 'db']:
            key = grad + str(l)
            vector = gradients[key].reshape(-1, 1)
            theta.append(vector)

    theta = np.concatenate(theta, axis=0)
    return theta

In [9]:
def vector_to_dictionary(theta, shape_reference):
    parameters = {}
    cursor = 0
    for l in range(1, len(shape_reference) //2 + 1):
        Wl_shape = shape_reference["W"+str(l)].shape
        bl_shape = shape_reference["b"+str(l)].shape

        size_W = Wl_shape[0]*Wl_shape[1]
        parameters["W"+str(l)] = theta[cursor: cursor+size_W].reshape(Wl_shape)
        cursor += size_W

        size_b = bl_shape[0]*bl_shape[1]
        parameters["b"+str(l)] = theta[cursor: cursor+size_b].reshape(bl_shape)
        cursor += size_b

    return parameters

In [10]:
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7, print_msg=False):
    parameters_values, keys = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)

    
    num_parameters = len(parameters_values)
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    for i in range(num_parameters):
        theta_plus = np.copy(parameters_values)
        theta_plus[i] += epsilon
        J_plus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(theta_plus,parameters))

        theta_minus = np.copy(parameters_values)
        theta_minus[i] -=  epsilon
        J_minus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(theta_minus,parameters))

        gradapprox[i] = (J_plus[i]-J_minus[i]) / (2*epsilon)

    numerator = np.linalg.norm(grad - gradapprox)
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)
    difference = (numerator) / (denominator)
    
    if print_msg:
        if difference > 2e-7:
            print ("\033[93m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m")
        else:
            print ("\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m")
            
    return difference