In [42]:
import numpy as np
import warnings
import pandas as pd
from typing import Tuple

In [48]:
def initialize_parameters(layer_dims : np.ndarray) -> dict:
    parameters = {}     # This will hold the parameters 
    
    # randomize the weights in each layer and set to zero all the biases
    for i in range (1,layer_dims.size):
        parameters[f"W_{i}"] = np.array((np.random.randn(layer_dims[i],layer_dims[i-1])))*np.sqrt(2/layer_dims[i-1])
        # parameters[f"W_{i}"] = np.array((np.random.randint(1,3,size=(layer_dims[i],layer_dims[i-1]))))
        parameters[f"b_{i}"] = np.zeros(layer_dims[i])
        
    return parameters

In [49]:
def linear_forward(A : np.ndarray, W : np.ndarray, b : np.ndarray)->Tuple[np.ndarray, dict]:
    """
    This function gets as input activation vector A, weight matrix W and bias vector b for each layer
    The output will be vector Z and a dictionary that saves the inpt parameters
    """
    Z = np.dot(W,A) + b
    # print(f"linear_forward function - A shape is {A.shape}")
    # print(f"linear_forward function - W shape is {W.shape}")
    # print(f"linear_forward function - b shape is {b.shape}")
    # print(f"linear_forward function - Z shape is {Z.shape}")
    # print(f"linear_forward function dot product = {np.dot(W,A) + b}")
    # print("------------------------------------------------")
    linear_cach = {"Activation" : A,
                 "Weights" : W,
                 "Bias" : b
                }
    return Z, linear_cach

In [47]:
def Softmax(Z : np.ndarray)->Tuple[np.ndarray, dict]:
    """
    This function gets as an input the Z vector
    The output will be the activation vector for this function using the softmax function and the Z input
    """    
    # Z should be np.array
    exp_Z_sum = np.sum(np.exp(Z)+1e-15)
    softmax = lambda Z_i : np.exp(Z_i)/exp_Z_sum
    A = softmax(Z)
    activation_cache = Z
    
    return A, activation_cache 

In [46]:
def ReLu(Z : np.ndarray)->Tuple[np.ndarray, dict]:
    """
    This function gets as an input the Z vector
    The output will be the activation vector for this function using the ReLu function and the Z input
    """

    relu_func = lambda Z : np.maximum(0,Z)
    A = relu_func(Z)
    activation_cache  = Z
    
    return A, activation_cache 

In [45]:
def linear_activation_forward(A_prev : np.ndarray, W : np.ndarray, b : np.ndarray, activation : str) -> Tuple[np.ndarray , dict]:
    """
    This function inputs are the previous layer activation, its weight matrix and the activation function
    The output is the activation vector and activation cach and the dictionary saving the information   
    """
    Z, linear_cach = linear_forward(A_prev, W, b)
    
    if activation == "softmax":
        A, activation_cache = Softmax(Z)
    elif activation == "relu":
        A, activation_cache = ReLu(Z)
    else:
        raise ValueError("The Activation code is not recognizable")
        
    dict_update = ({"Layer Activation" : activation_cache})
    cach = {**linear_cach,**dict_update}

    return A, cach

In [43]:
def L_model_forward(X : np.ndarray, parameters : dict, use_batchnorm : bool) -> Tuple[np.ndarray, dict]:
    """
    Function input:
    X - the data, numpy array of shape (input size, number of examples)
    parameters – the initialized W and b parameters of each layer - a dictionary with W_i and b_i as titles
    use_batchnorm - a boolean flag used to determine whether to apply batchnorm after the activation 

    Funciton output:
    AL – the last post-activation value
    caches – a list of all the cache objects generated by the linear_forward function

    """
    X = X.T     
    # For the case the number of examples is one
    if( X.ndim == 1):
        X = X.reshape(-1,1)

    num_param = np.shape(X)[0] #number of lines
    num_examples = np.shape(X)[1] #number of column    
    caches = {}
    L = len(parameters) // 2  
    A_prev = X
    
    for j in range(1,L):

        W = parameters[f'W_{j}']
        b = parameters[f'b_{j}']
        bias = np.array([b]*num_examples).T #creates a bias matrix
        A_prev, cach = linear_activation_forward(A_prev, W, bias,'relu')
        caches.update({f'Layer_{j}' : cach})

    W = parameters[f'W_{L}']
    b = parameters[f'b_{L}']
    bias = np.array([b]*num_examples).T #creates a bias matrix
    AL, cach = linear_activation_forward(A_prev, W, bias,'softmax')
    caches.update({f'AL[{L}]' : cach})
    
    # print (f"Activation for last layer is {AL}")

    return AL, caches

In [38]:
def compute_cost(AL : np.ndarray,Y : np.ndarray)-> int:
    """
    Input:
    AL – probability vector corresponding to your label predictions, shape (num_of_classes, number of examples)
    Y – the labels vector (i.e. the ground truth)

    Output:
    cost – the cross-entropy cost
    """
    num_examples = np.shape(AL)[1]
    print (f"num_examples is {num_examples}")
    print(f"dot product is {np.dot(Y,np.log(AL))}")
    cost = -np.sum(np.dot(Y,np.log(AL)))
    print(f"cost is {cost}")
    return cost/num_examples

In [77]:
def apply_batchnorm(A : np.ndarray)-> int:
    """
    Description:
    performs batchnorm on the received activation values of a given layer.

    Input:
    A - the activation values of a given layer

    output:
    NA - the normalized activation values, based on the formula learned in class
    """
    epsilon = 1e-5
    mu = np.mean(A, axis=0)
    var = np.var(A, axis=0)
    A_normalized = (A - mu) / np.sqrt(var + epsilon)

    return A_normalized