In [58]:
import numpy as np

In [106]:
# Initialize weights
def initialize_weights(layer_dimensions):
    np.random.seed(42) # For reproducibility
    parameters = {}
    L = len(layer_dimensions) - 1 # Number of layers excluding input layer
    for l in range(1, L + 1):
        # The initialization for weights
        parameters[f"W{l}"] = np.random.randn(
            layer_dimensions[l], layer_dimensions[l - 1]
            ) * np.sqrt(2 / layer_dimensions[l - 1])

        # Initializing biases to zeros
        parameters[f"b{l}"] = np.zeros((layer_dimensions[l], 1))
    return parameters

In [107]:
# Activation functions

def elu(x):
    alpha = 1
    return np.where(x > 0, x, alpha * (np.exp(x) - 1))

def softmax(x):
    exp_x = np.exp(x)
    return exp_x / np.sum(exp_x, axis = 0, keepdims = True)


In [129]:
# cross-entropy Loss function
def loss(y_hat, y):
    assert(y.shape[0] == y_hat.shape[0])
    assert(y.shape[1] == y_hat.shape[1])
    assert(y.shape[1] == 1)
    return -np.sum(y * np.log(y_hat))

# Test
y_temp = np.array([[1], [0], [0]])
y_hat_temp = np.array([[0.1], [0.1], [0.8]])
assert(loss(y_hat_temp, y_temp) > 1)

In [108]:
# Forward functions

def linear_and_activation_forward(prev_A, W, b):
    assert(prev_A.shape[1] == 1)
    assert(W.shape[1] == prev_A.shape[0])
    assert(b.shape[1] == 1)
    assert(W.shape[0] == b.shape[0])

    Z = np.dot(W, prev_A) + b

    A = elu(Z)
    cache = ((prev_A, W, b), Z)
    return A, cache

def linear_and_activation_output(prev_A, W, b):
    Z = np.dot(W, prev_A) + b
    A = softmax(Z)
    cache = ((prev_A, W, b), Z)
    return A, cache

def feed_forward(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2
    for l in range(1, L):
        A_prev = A
        W = parameters[f"W{l}"]
        b = parameters[f"b{l}"]
        A, cache = linear_and_activation_forward(A_prev, W, b)
        caches.append(cache)
    
    W = parameters[f"W{L}"]
    b = parameters[f"b{L}"]
    A, cache = linear_and_activation_output(A, W, b)
    caches.append(cache)

    return A, caches
    

In [109]:
# Initialize network

hl_count = 5
n_x = 5
n_h = 10 
n_y = 3

layer_dimensions = [n_x] + [n_h for _ in range(0, hl_count)] + [n_y]
X = np.array([[1], [2], [3], [4], [5]])

parameters = initialize_weights(layer_dimensions)

In [131]:
# Run feed forward algorithm and get output

output, caches = feed_forward(X, parameters)
print("Output of feed forward is: \n", output)

Output of feed forward is: 
 [[0.01158557]
 [0.40535973]
 [0.5830547 ]]
