## Imports

In [1]:
import numpy as np

## Activation functions

In [None]:
    """
    Calculates and returns the depth of the tree
        Parameters:
            tree (<DecisionTreeClassifier> object): tree object to draw (optional)
        Returns:
            (int): depth of the tree
    """

In [35]:
def relu(Z):
    """
    Computes ReLU (Rectified Lenear Unit) activation on Z.
        Parameters:
            Z (<numpy.ndarray>)
        Returns:
            A (<numpy.ndarray>): Z passed to the relu
            cache (<numpy.ndarray>): input (for backward propagation)
    """
    A = np.maximum(0, Z)
    cache = Z
    
    return (A, cache)


def sigmoid(Z):
    """
    Computes sigmoid activation on Z.
        Parameters:
            Z (<numpy.ndarray>)
        Returns:
            A (<numpy.ndarray>): Z passed to the relu
            cache (<numpy.ndarray>): input (for backward propagation)
    """
    A = 1 / (1 + np.exp(-Z))
    cache = Z
    
    return (A, cache)

## Weights Initialization

### add diff inits
He, Xavier, random, zeros?

https://datascience-enthusiast.com/DL/Improving-DeepNeural-Networks-Initialization.html

In [10]:
def initialize_params(layer_dims):
    """
    Initializes the weights for the (deep) neural network layers using Xavier's Initialization.
        Parameters:
            layer_dims (list): list of layers' number of nodes (including input layer)
        Returns:
            params (dict): dictionary containing weights and bias per layer
                "Wn": <numpy.ndarray> weights for layer n
                "bn": <numpy.ndarray> bias for layer n
    """
    param = {}
    nlayers = len(layer_dims)
    
    for l in range(1, nlayers):
        params[f"W{l}"] = np.random.rand(layer_dims[l], layer_dims[l-1]) \
        * np.sqrt(6/(layer_dims[l]+layer_dims[l-1]))
#         params[f"W{l}"] = np.random.randn(layer_dims[l], layer_dims[l-1]) \
#         * np.sqrt(2/(layer_dims[l]+layer_dims[l-1]))
        
        params[f"b{l}"] = np.zeros((layer_dims[l], 1))
    
    return params

## Forward Propagation

In [55]:
def forward_propagate_layer(A_prev, W, b, activate_func):
    """
    Applies forward propagation (linear & activation).
    Parameters:
            A_prev (list): this layer's input (last layer's output)
            params (dict): dictionary containing weights and bias per layer
                "Wn": <numpy.ndarray> weights for layer n
                "bn": <numpy.ndarray> bias for layer n
            activate_func (<function>): activation function
        Returns:
            A (<numpy.ndarray>): layer output (post-activation)
            cache (tuple): forward propagation caches for backward
                (linear_cache, activation_cache)
            
    """
    Z = W @ A_prev + b
    linear_cache = (A_prev, W, b)
    
    A, activation_cache = activate_func(Z)
    cache = (linear_cache, activation_cache)
    
    return (A, cache)

In [67]:
def forward_propagate(X, params):
    """
    Applies forward propagation (linear & activation).
    Parameters:
            X (list): this layer's input (last layer's output)
            params (dict): dictionary containing weights and bias per layer
                "Wn": <numpy.ndarray> weights for layer n
                "bn": <numpy.ndarray> bias for layer n
            activate_func (<function>): activation function
        Returns:
            A (<numpy.ndarray>): layer output (post-activation)
            cache (tuple): forward propagation caches for backward
                (linear_cache, activation_cache)
            
    """
    caches = []
    A = X
    nlayers = len(params) // 3
    
    for l in range(1, nlayers+1):
        A, cache = forward_propagate_layer(A, 
                                           params[f"W{l}"], 
                                           params[f"b{l}"],
                                           params[f"A{l}"])
        caches.append(cache)
        
    
    return (A, caches)