# Some utility functions

In [137]:
import numpy as np
import matplotlib.pyplot as plt
import math

In [138]:
def initialize_parameters(layers_dims):
    np.random.seed(1)               
    parameters = {}
    L = len(layers_dims)            

    for l in range(1, L):           
        parameters["W" + str(l)] = np.random.randn(
            layers_dims[l], layers_dims[l - 1]) * 0.01
        parameters["b" + str(l)] = np.zeros((layers_dims[l], 1))

        assert parameters["W" + str(l)].shape == (
            layers_dims[l], layers_dims[l - 1])
        assert parameters["b" + str(l)].shape == (layers_dims[l], 1)
    
    return parameters

In [139]:
def sigmoid(Z):
    A = 1 / (1 + np.exp(-Z))
    return A, Z


In [224]:
# Define helper functions that will be used in L-model forward prop
def linear_forward(A_prev, W, b):
    Z = np.dot(W, A_prev) + b
    cache = (A_prev, W, b)
    return Z, cache


def linear_activation_forward(A_prev, W, b, activation_fn):

    if activation_fn == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)

    else:
      Z, linear_cache = linear_forward(A_prev, W, b)
      A, activation_cache = Z, Z


    assert A.shape == (W.shape[0], A_prev.shape[1])

    cache = (linear_cache, activation_cache)
    return A, cache


def L_model_forward(X, parameters, hidden_layers_activation_fn="sigmoid"):
    A = X                           
    caches = []                     
    L = len(parameters) // 2        

    for l in range(1, L):
        A_prev = A
        A, cache = linear_activation_forward(
            A_prev, parameters["W" + str(l)], parameters["b" + str(l)],
            activation_fn=hidden_layers_activation_fn)
        caches.append(cache)

    Z, linear_cache = linear_forward(A, parameters["W" + str(L)], parameters["b" + str(L)])
    # AL, cache = linear_activation_forward(
    #     A, parameters["W" + str(L)], parameters["b" + str(L)],
    #     activation_fn="")
    AL, cache = Z, (linear_cache, Z)
    caches.append(cache)

    assert AL.shape == (1, X.shape[1])
    return AL, caches


In [225]:
# Compute cross-entropy cost
def compute_cost(AL, y):
    m = y.shape[1]              
    cost = 0.5 * np.sum((AL - y)**2)/m
    return cost

In [226]:

def sigmoid_gradient(dA, Z):
    A, Z = sigmoid(Z)
    dZ = dA * A * (1 - A)

    return dZ

# define helper functions that will be used in L-model back-prop
def linear_backword(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]

    dW = (1 / m) * np.dot(dZ, A_prev.T)
    db = (1 / m) * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)

    assert dA_prev.shape == A_prev.shape
    assert dW.shape == W.shape
    assert db.shape == b.shape

    return dA_prev, dW, db


def linear_activation_backward(dA, cache, activation_fn):
    linear_cache, activation_cache = cache

    if activation_fn == "sigmoid":
        dZ = sigmoid_gradient(dA, activation_cache)
        dA_prev, dW, db = linear_backword(dZ, linear_cache)

    else:
        # dZ = relu_gradient(dA, activation_cache)
        dA_prev, dW, db = linear_backword(dA, linear_cache)

    return dA_prev, dW, db


def L_model_backward(AL, y, caches, hidden_layers_activation_fn="sigmoid"):
    y = y.reshape(AL.shape)
    L = len(caches)
    grads = {}

    # dAL = np.divide(AL - y, np.multiply(AL, 1 - AL))
    error = AL - y
    dAL = (error) / np.shape(y)[1]    

    grads["dA" + str(L - 1)], grads["dW" + str(L)], grads[
        "db" + str(L)] = linear_activation_backward(
            dAL, caches[L - 1], "")
       

    for l in range(L - 1, 0, -1):
        current_cache = caches[l - 1]
        grads["dA" + str(l - 1)], grads["dW" + str(l)], grads[
            "db" + str(l)] = linear_activation_backward(
                grads["dA" + str(l)], current_cache,
                hidden_layers_activation_fn)

    return grads


In [227]:
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2

    for l in range(1, L + 1):
        parameters["W" + str(l)] = parameters[
            "W" + str(l)] - learning_rate * grads["dW" + str(l)]
        parameters["b" + str(l)] = parameters[
            "b" + str(l)] - learning_rate * grads["db" + str(l)]
    return parameters

In [228]:
# Define the multi-layer model using all the helper functions we wrote before
def L_layer_model(
        X, y, layers_dims, learning_rate=0.01, num_iterations=3000,
        print_cost=True, hidden_layers_activation_fn="sigmoid"):
    np.random.seed(1)

    # initialize parameters
    parameters = initialize_parameters(layers_dims)

    # intialize cost list
    cost_list = []

    # iterate over num_iterations
    for i in range(num_iterations):
        # iterate over L-layers to get the final output and the cache
        AL, caches = L_model_forward(
            X, parameters, hidden_layers_activation_fn)
        
        # compute cost to plot it
        cost = compute_cost(AL, y)

        # iterate over L-layers backward to get gradients
        grads = L_model_backward(AL, y, caches, hidden_layers_activation_fn)

        # update parameters
        parameters = update_parameters(parameters, grads, learning_rate)

        # append each 100th cost to the cost list
        if (i + 10) % 1 == 0 and print_cost:
            print(f"The cost after {i + 1} iterations is: {cost:.4f}")

        if i % 10 == 0:
            cost_list.append(cost)

    # plot the cost curve
    plt.figure(figsize=(10, 6))
    plt.plot(cost_list)
    plt.xlabel("Iterations (per hundreds)")
    plt.ylabel("Loss")
    plt.title(f"Loss curve for the learning rate = {learning_rate}")

    return parameters


def accuracy(X, parameters, y):
    probs, caches = L_model_forward(X, parameters)
    accuracy = np.sum((y - probs)**2) / (2 * np.shape(y)[1])
    return f"The accuracy rate is: {accuracy:.2f}%."

In [None]:

X_train = np.loadtxt("X_train.csv")
Y_train = np.loadtxt("Y_train.csv")
X_test = np.loadtxt("X_test.csv")
Y_test = np.loadtxt("Y_test.csv")

print(np.shape(X_train), np.shape(Y_train))
mean_x = np.mean(X_train, axis=0)
std_x = np.std(X_train, axis=0)
xtrain = (X_train - mean_x) / std_x
xtrain = np.transpose(xtrain)

mean_y = np.mean(Y_train)
std_y = np.std(Y_train)
ytrain = (Y_train - mean_y) / std_y
ytrain = ytrain[:, np.newaxis]
ytrain = np.transpose(ytrain)

Xval = (X_test - mean_x) / std_x
Yval = (Y_test - mean_y) / std_y

xval = np.transpose(Xval)
yval = np.transpose(Yval[:, np.newaxis])
print(np.shape(xtrain), np.shape(ytrain))

# NN with relu activation fn
# Setting layers dims
layers_dims = [xtrain.shape[0], 5, 1]
parameters_relu = L_layer_model( xtrain, ytrain, layers_dims, learning_rate=0.3, num_iterations=300, hidden_layers_activation_fn="sigmoid")
# Print the accuracy
accuracy(xval, parameters_relu, yval)