In [1]:
import numpy as np
import warnings
import pandas as pd

In [2]:
def initialize_parameters(layer_dims):
    parameters = {}     # This will hold the parameters 
    
    # randomize the weights in each layer and set to zero all the biases
    for i in range (1,layer_dims.size):
        param[f"W_{i}"] = np.matrix((np.random.randn(layer_dims[i],layer_dims[i-1])))*np.sqrt(2/layer_dims[i-1])
        param[f"b_{i}"] = np.zeros(layer_dims[i])
        
    return parameters

In [3]:
# This function gets as input activation vector A, weight matrix W and bias vector b for each layer
# The output will be vector Z and a dictionary that saves the inpt parameters

def linear_forward(A, W, b):
    Z = np.dot(W,A) + b
    linear_cach = {"Activation" : A,
                 "Weights" : W,
                 "Bias" : b
                }
    return np.array(Z), linear_cach

In [4]:
# This function gets as an input the Z vector
# The output will be the activation vector for this function using the softmax function and the Z input

def Softmax(Z):
    
    # Z should be np.array
    exp_Z_sum = np.sum(np.exp(Z))
    softmax = lambda Z_i : np.exp(Z_i)/exp_Z_sum
    A = softmax(Z)
    activation_cache = Z
    
    return A, activation_cache 

In [5]:
# This function gets as an input the Z vector
# The output will be the activation vector for this function using the ReLu function and the Z input

def ReLu(Z):
    
    relu_func = lambda Z : np.maximum(0,Z)
    A = relu_func(Z)
    activation_cache  = Z
    
    return A, activation_cache 

In [6]:
# This function inputs are the previous layer activation, its weight matrix and the activation function
# The output is the activation vector and activation cach and the dictionary saving the information

def linear_activation_forward(A_prev, W, b, activation):
    
    Z, linear_cach = linear_forward(A_prev, W, b)
    
    if activation == "softmax":
        A, activation_cache = Softmax(Z)
    elif activation == "relu":
        A, activation_cache = ReLu(Z)
    else:
        raise ValueError("The Activation code is not recognizable")
        
    dict_update = ({"Layer Activation" : activation_cache})
    cach = {**linear_cach,**dict_update}

    return A, cach

In [11]:
# Function input:
# X - the data, numpy array of shape (input size, number of examples)
# parameters – the initialized W and b parameters of each layer - a dictionary with W_i and b_i as titles
# use_batchnorm - a boolean flag used to determine whether to apply batchnorm after the activation 

# Funciton output:
# AL – the last post-activation value
# caches – a list of all the cache objects generated by the linear_forward function

def L_model_forward(X, parameters, use_batchnorm):
    
    input_size = np.shape(X)[0]
    number_examples = np.shape(X)[1]
    A_prev = X
    caches = {}
    
    L = len(parameters) // 2
    
    for i in range(1,L):
        W = parameters[f'W_{i}']
        b = parameters[f'b_{i}']
        bias = np.tile(b, (1,number_examples)) #creates a bias matrix
        A_prev, cach = linear_activation_forward(A_prev, W, bias,'relu')
        caches.update({f'Layer_{i}' : cach})
    
    AL, cach = linear_activation_forward(A_prev, W, bias,'softmax')
    caches.update({f'AL' : cach})
    
    return AL, caches

In [8]:
# Input:
# AL – probability vector corresponding to your label predictions, shape (num_of_classes, number of examples)
# Y – the labels vector (i.e. the ground truth)

# Output:
# cost – the cross-entropy cost

def compute_cost(AL,Y):
    
    num_classes = np.shape(AL)[0]
    num_examples = np.shape(AL)[1]
    
    cost_log = lambda Y_i,Y_hat : Y_i*np.log(Y_hat)
    cost = 0
    
    for j in range(num_classes):    
        Y_hat = Y[j]
        for i in range(num_examples):  
            Y_i = AL[j][i]
            cost += cost_log(Y_i,Y_hat)
    
    return cost/num_examples

In [9]:
# Description:
# performs batchnorm on the received activation values of a given layer.

# Input:
# A - the activation values of a given layer

# output:
# NA - the normalized activation values, based on the formula learned in class

def apply_batchnorm(A):
    
    epsilon = 1e-5
    mu = np.mean(A)
    var = np.var(A)
    Z_norm = (A-mu)/np.sqrt(var+epsilon)


In [12]:
print("This is a change")

This is a change
