# 1. Packages

In [10]:
'''
numpy - scientific computing with Python
matplotlib - plot graphs in Python
h5py - interact with a dataset that is stored on an H5 file
'''
import numpy as np
import matplotlib.pyplot as plt
# plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
# plt.rcParams['image.interpolation'] = 'nearest'
# plt.rcParams['image.cmap'] = 'gray'
import h5py

# sklearn provides simple and efficient tools for data mining and data analysis.
import sklearn
import sklearn.datasets
import sklearn.linear_model

from dnn_utils_v2 import sigmoid, sigmoid_backward, relu, relu_backward

%matplotlib inline

# for test
np.random.seed(1)
from testCases_v4 import *

# Building Deep Neural Network

## Initialization

In [7]:
def initialize_parameters(layer_dims):
    """
    Arguments:
    layer_dims -- python list containing the dimensions of each layer
    
    Returns:
    parameters -- python dictionary containing parameters 'W1', 'b1', ..., 'WL', 'bL'
                    Wl -- weight matrix of shape (layer_dims[l], layer_dims[l-1])
                    bl -- bias vector of shape (layer_dims[l], 1)
    """
    
    np.random.seed(3)
    
    parameters = {}
    L = len(layer_dims) # number of layers, including input layer
    
    for l in range(1, L):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
        
    return parameters

## Forward Propagation

In [14]:
def linear_activation_forward(A_prev, W, b, activation):
    """
    Forward propagation of one layer, including linear and activation.
    
    Arguments:
    A_prev -- activations of previous layer (or dataset) of shape (size of previous layer, number of examples)
    W -- weights matrix of shape (size of current layer, size of previous layer)
    b -- bias vector of shape (size of current layer, 1)
    activation -- text string, "sigmoid" or "relu"
    
    Returns:
    A -- activation values of shape (size of current layer, number of examples)
    cache -- tuple containing A, Z, W and b
    """
    
    Z = np.dot(W, A_prev) + b
    
    if activation == "sigmoid":
        A = 1 / (1 + np.exp(-Z))
    
    elif activation == "relu":
        A = np.maximum(0, Z)
    
    cache = (A, Z, W, b)
    return A, cache


def forward_propagation(X, parameters):
    """
    Total forward propagation including [LINEAR -> RELU]*(L-1) -> [LINEAR -> SIGMOID].
    
    Arguments:
    X -- the input dataset of size (size of input layer, number of examples)
    parameters -- dictionary containing Wl and bl
    
    Returns:
    AL -- last post-activation value of shape (size of output layer, number of examples)
    caches -- list of all cache of each layer, indexed from 0 to L-1, total length of L
    """
    
    caches = []
    A = X
    L = len(parameters) // 2
    
    # Implement [LINEAR -> RELU]*(L-1)
    for l in range(1, L):
        A_prev = A
        A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], "relu")
        caches.append(cache)
    
    # Implement [LINEAR -> SIGMOID]
    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], "sigmoid")
    caches.append(cache)
    
    return AL, caches