# Building a Neural Network

Please note that I've already completed the "Neural Networks and Deep Learning" course from Andrew Ng. This is the course which was recommended in class notes. Much of my code will look similar to what I did for that assignment. However, the activation functions and their use in backpropagation will be unique. I'm kind of overbuilding in some ways but this serves the purpose of being able to use my implementation here to complete a stage of our research project.

In [1]:

from collections import deque

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split



# Feedforward Neural Network Implementation

The following functions implement a Feedforward Neural Network. These
functions are broken out into a few sections

* Parameter Initialization
* Activation functions and their gradients
* Forward propagation
* Backward propagation

These functions will be used to complete parts **(b)**
and **(c)** of Homework 5.

## Test Cases

In [2]:
test_seed = 3
test_layer_dims = [5,4,3]
test_activations = ["relu", "relu", "sigmoid"]

In [3]:
def linear_forward_test_case():
    m = (np.array([[ 1.62434536, -0.61175641],
        [-0.52817175, -1.07296862],
        [ 0.86540763, -2.3015387 ]]),
     np.array([[ 1.74481176, -0.7612069 ,  0.3190391 ]]),
     np.array([[-0.24937038]])) 
    return m

In [4]:
def linear_activation_forward_test_case():
    m = (np.array([[-0.41675785, -0.05626683],
        [-2.1361961 ,  1.64027081],
        [-1.79343559, -0.84174737]]),
     np.array([[ 0.50288142, -1.24528809, -1.05795222]]),
     np.array([[-0.90900761]]))
    return m

In [5]:
def L_model_forward_test_case_2hidden():
    m = (np.array([[-0.31178367,  0.72900392,  0.21782079, -0.8990918 ],
        [-2.48678065,  0.91325152,  1.12706373, -1.51409323],
        [ 1.63929108, -0.4298936 ,  2.63128056,  0.60182225],
        [-0.33588161,  1.23773784,  0.11112817,  0.12915125],
        [ 0.07612761, -0.15512816,  0.63422534,  0.810655  ]]),
     {'W1': np.array([[ 0.35480861,  1.81259031, -1.3564758 , -0.46363197,  0.82465384],
         [-1.17643148,  1.56448966,  0.71270509, -0.1810066 ,  0.53419953],
         [-0.58661296, -1.48185327,  0.85724762,  0.94309899,  0.11444143],
         [-0.02195668, -2.12714455, -0.83440747, -0.46550831,  0.23371059]]),
      'W2': np.array([[-0.12673638, -1.36861282,  1.21848065, -0.85750144],
         [-0.56147088, -1.0335199 ,  0.35877096,  1.07368134],
         [-0.37550472,  0.39636757, -0.47144628,  2.33660781]]),
      'W3': np.array([[ 0.9398248 ,  0.42628539, -0.75815703]]),
      'b1': np.array([[ 1.38503523],
         [-0.51962709],
         [-0.78015214],
         [ 0.95560959]]),
      'b2': np.array([[ 1.50278553],
         [-0.59545972],
         [ 0.52834106]]),
      'b3': np.array([[-0.16236698]])})
    return m

In [6]:
def compute_cost_test_case():
    m = (np.array([[1, 1, 1]]), 
         np.array([[ 0.8,  0.9,  0.4]]))
    return m

In [7]:
def linear_backward_test_case():
    m = (np.array([[ 1.62434536, -0.61175641]]), 
         (np.array([[-0.52817175, -1.07296862],
         [ 0.86540763, -2.3015387 ],
         [ 1.74481176, -0.7612069 ]]),
      np.array([[ 0.3190391 , -0.24937038,  1.46210794]]),
      np.array([[-2.06014071]])))
    return m

In [8]:
def linear_activation_backward_test_case():
    m = (np.array([[-0.41675785, -0.05626683]]), 
         ((np.array([[-2.1361961 ,  1.64027081],
          [-1.79343559, -0.84174737],
          [ 0.50288142, -1.24528809]]),
       np.array([[-1.05795222, -0.90900761,  0.55145404]]),
       np.array([[ 2.29220801]])),
      np.array([[ 0.04153939, -1.11792545]])))
    return m

In [9]:
def L_model_backward_test_case():
    m = (np.array([[ 1.78862847,  0.43650985]]),
     np.array([[1, 0]]),
     (((np.array([[ 0.09649747, -1.8634927 ],
           [-0.2773882 , -0.35475898],
           [-0.08274148, -0.62700068],
           [-0.04381817, -0.47721803]]),
        np.array([[-1.31386475,  0.88462238,  0.88131804,  1.70957306],
           [ 0.05003364, -0.40467741, -0.54535995, -1.54647732],
           [ 0.98236743, -1.10106763, -1.18504653, -0.2056499 ]]),
        np.array([[ 1.48614836],
           [ 0.23671627],
           [-1.02378514]])),
       np.array([[-0.7129932 ,  0.62524497],
          [-0.16051336, -0.76883635],
          [-0.23003072,  0.74505627]])),
      ((np.array([[ 1.97611078, -1.24412333],
           [-0.62641691, -0.80376609],
           [-2.41908317, -0.92379202]]),
        np.array([[-1.02387576,  1.12397796, -0.13191423]]),
        np.array([[-1.62328545]])),
       np.array([[ 0.64667545, -0.35627076]]))))
    return m


In [10]:
def update_parameters_test_case():
    m = ({'W1': np.array([[-0.41675785, -0.05626683, -2.1361961 ,  1.64027081],
         [-1.79343559, -0.84174737,  0.50288142, -1.24528809],
         [-1.05795222, -0.90900761,  0.55145404,  2.29220801]]),
      'W2': np.array([[-0.5961597 , -0.0191305 ,  1.17500122]]),
      'b1': np.array([[ 0.04153939],
         [-1.11792545],
         [ 0.53905832]]),
      'b2': np.array([[-0.74787095]])},
     {'dW1': np.array([[ 1.78862847,  0.43650985,  0.09649747, -1.8634927 ],
         [-0.2773882 , -0.35475898, -0.08274148, -0.62700068],
         [-0.04381817, -0.47721803, -1.31386475,  0.88462238]]),
      'dW2': np.array([[-0.40467741, -0.54535995, -1.54647732]]),
      'db1': np.array([[ 0.88131804],
         [ 1.70957306],
         [ 0.05003364]]),
      'db2': np.array([[ 0.98236743]])})
    return m

## Parameter Intitializtion

In [11]:
def initialize_parameters(layer_dims, seed=42):
    """ Initialize parameters for each layer in NN
    
    :param layer_dims: dimensions for each layer
    :param seed: int to set random seed
    
    :return: weight matrices W and bias vectors b
    """
    parameters = {}
    L = len(layer_dims)
    np.random.seed(seed)
    
    for l in range(1, L):
        
        parameters['W' + str(l)] = \
        np.random.randn(layer_dims[l], 
                        layer_dims[l-1]) * 0.01
        parameters['b' + str(l)] = \
        np.zeros((layer_dims[l], 1))
        
    assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l-1]))
    assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))
    
    return parameters

In [12]:
parameters = initialize_parameters(test_layer_dims, seed=test_seed)
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))

W1 = [[ 0.01788628  0.0043651   0.00096497 -0.01863493 -0.00277388]
 [-0.00354759 -0.00082741 -0.00627001 -0.00043818 -0.00477218]
 [-0.01313865  0.00884622  0.00881318  0.01709573  0.00050034]
 [-0.00404677 -0.0054536  -0.01546477  0.00982367 -0.01101068]]
b1 = [[0.]
 [0.]
 [0.]
 [0.]]
W2 = [[-0.01185047 -0.0020565   0.01486148  0.00236716]
 [-0.01023785 -0.00712993  0.00625245 -0.00160513]
 [-0.00768836 -0.00230031  0.00745056  0.01976111]]
b2 = [[0.]
 [0.]
 [0.]]


## Activation functions

* Sigmoid $F(Z)$, range $[0,1]$

    $$ \begin{align*} F(Z) &= \frac{1}{1 + e^{-Z}} = \sigma(Z) \\
     F^{\prime}(Z) &= F(X)(1 - F(Z)) \end{align*}$$
    
* Tanh $F(Z)$, range $[-1, 1]$

    $$\begin{align*} F(Z) &= \frac{e^Z - e^{-Z}}{e^Z + e^{-Z}} = \tanh(Z) \\
       F^{\prime}(Z) &= 1 - F(Z)^2 \end{align*}$$
       
* Relu $F(Z)$, range $[0, +\infty]$

    $$\begin{align*} F(Z) &= \max(0,Z) \\
       F^{\prime}(Z) &= \begin{cases} 1, & \text{ if } Z > 0 \\
                         \text{undefined}, & \text{ if } Z = 0 \\
                         0, & \text{ if } Z < 0 \end{cases} 
                         \end{align*}$$
                         
Compute gradient for an activation function

$$dZ^{[l]} = dA^{[l]} * g'(Z^{[l]}) $$

TODO: verify gradient equation from notes

In [13]:
def sigmoid(Z):
    """ Sigmoid activation function
    
    :param Z: -- the input of the activation function
    
    :return: sigmoid function applied to vector Z
    """
    A = (1 + np.exp(-Z))**(-1)
    cache = Z
    return A, cache

In [14]:
# TODO: figure out how the activation_cache works

def sigmoid_gradient(dA, cache):
    """ Gradient of sigmoid function """
    
    Z = cache
    s = np.power((1 + np.exp(-Z)),-1)
    dZ = dA * s * (1 - s)
    
    assert (dZ.shape == Z.shape)
    
    return dZ

In [15]:
def relu(Z):
    """ Relu activation function
    
    :param Z: -- the input of the activation function
    
    :return: relu function applied to vector Z
    """
    A = np.maximum(Z, 0)
    
    assert(A.shape == Z.shape)
    cache = Z
    return A, cache

In [16]:
def relu_gradient(dA, cache):
    """ Gradient of the relu function """
    Z = cache
    dZ = np.array(dA, copy=True)
    assert (dZ.shape == Z.shape)
    
    dZ[Z <= 0] = 0
    
    return dZ


In [17]:
def tanh(Z):
    """ Tanh activation function
    
    :param Z: -- the input of the activation function
    
    :return: tanh function applied to vector Z
    """
    e_z = np.exp(Z)
    e_nz = np.exp(-Z)
    A = (e_z - e_nz)/(e_z + e_nz)
    return A, Z

In [18]:
def tanh_gradient(Z, cache):
    """ Gradient of the tanh function """
    
    Z = cache
    
    e_z = np.exp(Z)
    e_nz = np.exp(-Z)
    s = (e_z - e_nz)/(e_z + e_nz)    
    A = Z * (1 - np.power(s, 2))

    return A, Z
        

## Forward propagation

Implementing forward propagation

In [19]:
def linear_forward(A, W, b):
    """ Linear part of a layer's forward propagation 
    
    :param A: activation from previous layer (or input data)
    :param W: weights matrix 
    :param b: bias vector
    
    :return: Z -- the input of the activation function
    """
    Z = np.dot(W, A) + b
    
    assert(Z.shape == (W.shape[0], A.shape[1]))
    cache = (A, W, b)
    
    return Z, cache

In [20]:
A, W, b = linear_forward_test_case()

Z, linear_cache = linear_forward(A, W, b)
print("Z = " + str(Z))

Z = [[ 3.26295336 -1.23429988]]


In [21]:
def linear_activation_forward(A_prev, W, b, activation):
    """ Forward propagation for the LINEAR->ACTIVATION layer
    
    :param A_prev: activation from previous layer (or input data)
    :param W: weights matrix
    :param b: bias vector
    :param activation: activation function name of "sigmoid", 
                "relu", or "tanh"
                
    :return: A -- output of the activation function
    :return: cache -- contains "linear_cache" and "activation_cache"
    """
    Z, linear_cache =  linear_forward(A_prev, W, b)
    
    if activation == "relu":
        A, activation_cache = relu(Z)
        
    elif activation == "sigmoid":
        A, activation_cache = sigmoid(Z)
        
    elif activation == "tanh":
        A, activation_cache = tanh(Z)
    
    else:
        raise(TypeError("Activation done not exist: {}".\
                        format(activation)))
        
    assert (A.shape == (W.shape[0], A_prev.shape[1]))
    cache = (linear_cache, activation_cache)
    return (A, cache)
    

In [22]:
A_prev, W, b = linear_activation_forward_test_case()

A, linear_activation_cache = linear_activation_forward(A_prev, W, b, activation = "sigmoid")
print("With sigmoid: A = " + str(A))

A, linear_activation_cache = linear_activation_forward(A_prev, W, b, activation = "relu")
print("With ReLU: A = " + str(A))

A, linear_activation_cache = linear_activation_forward(A_prev, W, b, activation = "tanh")
print("With tanh: A = " + str(A))

With sigmoid: A = [[0.96890023 0.11013289]]
With ReLU: A = [[3.43896134 0.        ]]
With tanh: A = [[ 0.99794156 -0.96982745]]


In [23]:
def L_model_forward(X, parameters, activations):
    """ Forward propogation given a model specification """
    caches = []
    A = X
    L = len(parameters) //2 # Number of layers in the neural network
    
    assert L == len(activations)
    
    for l in range(1, L):
        A_prev = A
        activation = activations.popleft()
        
        A, cache = linear_activation_forward(A_prev, parameters["W" + str(l)], 
                                             parameters["b" + str(l)], 
                                             activation = activation)
        caches.append(cache)

    activation = activations.popleft()
    AL, cache = linear_activation_forward(A, parameters["W" + str(l + 1)], 
                                          parameters["b" + str(l + 1)], 
                                          activation = activation)
    caches.append(cache)
    assert(AL.shape == (1,X.shape[1]))
    
    return (AL, caches)


In [24]:
X, parameters = L_model_forward_test_case_2hidden()
activations = deque(["relu", "relu", "sigmoid"])
AL, caches = L_model_forward(X, parameters, activations)
print("AL = " + str(AL))
print("Length of caches list = " + str(len(caches)))

AL = [[0.03921668 0.70498921 0.19734387 0.04728177]]
Length of caches list = 3


## Cost Function

Using cross-entropy cost $J$, using the following formula: $$-\frac{1}{m} \sum\limits_{i = 1}^{m} (y^{(i)}\log\left(a^{[L] (i)}\right) + (1-y^{(i)})\log\left(1- a^{[L](i)}\right))$$

TODO: May need a different cost function depending on the notes

In [25]:
def compute_cost(AL, Y):
    """
    Cross entropy cost J
    """
    m = Y.shape[1]
    a = np.multiply(Y, np.log(AL))
    b = np.multiply((1 - Y), np.log(1 - AL))
    cost = - 1 * np.sum(a + b, axis=1) / m
    
    cost = np.squeeze(cost)
    assert(cost.shape == ())
    
    return cost


In [26]:
Y, AL = compute_cost_test_case()

print("cost = " + str(compute_cost(AL, Y)))

cost = 0.414931599615397


## Backward propagation

Implementing backward propagation

In [27]:
def linear_backward(dZ, cache):
    """ Linear portion of backward propagation for a single layer """
    A_prev, W, b = cache
    m = A_prev.shape[1]
    
    dW = (1/m) * np.dot(dZ, A_prev.T)
    db = (1/m) * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)
    
    assert (dA_prev.shape == A_prev.shape)
    assert (dW.shape == W.shape)
    assert (db.shape == b.shape)
    
    return (dA_prev, dW, db)


In [28]:
# Set up some test inputs
dZ, linear_cache = linear_backward_test_case()

dA_prev, dW, db = linear_backward(dZ, linear_cache)
print ("dA_prev = "+ str(dA_prev))
print ("dW = " + str(dW))
print ("db = " + str(db))

dA_prev = [[ 0.51822968 -0.19517421]
 [-0.40506362  0.15255393]
 [ 2.37496825 -0.8944539 ]]
dW = [[-0.10076895  1.40685096  1.64992504]]
db = [[0.50629448]]


In [29]:
def linear_activation_backward(dA, cache, activation):
    """ Backward propagation for the LINEAR->ACTIVATION layer """
    
    linear_cache, activation_cache = cache
    
    if activation == "relu":
        dZ = relu_gradient(dA, activation_cache)
        
    elif activation == "sigmoid":
        dZ = sigmoid_gradient(dA, activation_cache)
        
    elif activation == "tanh":
        dZ = tanh_gradient(dA, activation_cache)
    
    dA_prev, dW, db = linear_backward(dZ, linear_cache)
    return (dA_prev, dW, db)

In [30]:
dAL, linear_activation_cache = linear_activation_backward_test_case()

dA_prev, dW, db = linear_activation_backward(dAL, linear_activation_cache, activation = "sigmoid")
print ("sigmoid:")
print ("dA_prev = "+ str(dA_prev))
print ("dW = " + str(dW))
print ("db = " + str(db) + "\n")

dA_prev, dW, db = linear_activation_backward(dAL, linear_activation_cache, activation = "relu")
print ("relu:")
print ("dA_prev = "+ str(dA_prev))
print ("dW = " + str(dW))
print ("db = " + str(db) + "\n")

#dA_prev, dW, db = linear_activation_backward(dAL, linear_activation_cache, activation = "tanh")
#print ("tanh:")
#print ("dA_prev = "+ str(dA_prev))
#print ("dW = " + str(dW))
#print ("db = " + str(db))

# TODO: fix tanh activation function (minor)

sigmoid:
dA_prev = [[ 0.11017994  0.0110534 ]
 [ 0.09466817  0.00949723]
 [-0.05743092 -0.00576155]]
dW = [[ 0.10266786  0.09778551 -0.01968084]]
db = [[-0.05729622]]

relu:
dA_prev = [[ 0.44090989 -0.        ]
 [ 0.37883606 -0.        ]
 [-0.2298228   0.        ]]
dW = [[ 0.44513825  0.37371418 -0.10478989]]
db = [[-0.20837892]]



In [31]:
def L_model_backward(AL, Y, caches, activations):
    """ Implement backward propagation for the specified model """
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    
    # Initialize the backpropagation
    
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    
    # Lth layer gradients
    
    activation = activations.pop()
    current_cache = caches[L-1]
    grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)] = \
    linear_activation_backward(dAL, current_cache, "sigmoid")
    
    # Loop from l=L-2 to l=0
    
    for l in reversed(range(L-1)):
        
        activation = activations.pop()
        current_cache = caches[l]
        
        dA_prev_temp, dW_temp, db_temp = \
        linear_activation_backward(grads["dA" + str(l + 1)], current_cache, "relu")
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp
        
    return grads
    

In [32]:
activations = ["relu", "sigmoid"]
AL, Y_assess, caches = L_model_backward_test_case()

grads = L_model_backward(AL, Y_assess, caches, activations)

print(grads)

{'dA1': array([[ 0.12913162, -0.44014127],
       [-0.14175655,  0.48317296],
       [ 0.01663708, -0.05670697]]), 'dW2': array([[-0.39202432, -0.13325855, -0.04601089]]), 'db2': array([[0.15187861]]), 'dA0': array([[ 0.        ,  0.52257901],
       [ 0.        , -0.3269206 ],
       [ 0.        , -0.32070404],
       [ 0.        , -0.74079187]]), 'dW1': array([[0.41010002, 0.07807203, 0.13798444, 0.10502167],
       [0.        , 0.        , 0.        , 0.        ],
       [0.05283652, 0.01005865, 0.01777766, 0.0135308 ]]), 'db1': array([[-0.22007063],
       [ 0.        ],
       [-0.02835349]])}


## Update parameters

Update parameters of the model using gradient descent.

In [33]:
def update_parameters(parameters, grads, learning_rate):
    """ Update parameters using gradient descent """
    L = len(parameters) // 2 # number of parameters in model
    
    for l in range(L):
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - \
        learning_rate * grads["dW" + str(l + 1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - \
        learning_rate * grads["db" + str(l + 1)]
        
    return parameters
    

In [34]:
parameters, grads = update_parameters_test_case()
parameters = update_parameters(parameters, grads, 0.1)

print ("W1 = "+ str(parameters["W1"]))
print ("b1 = "+ str(parameters["b1"]))
print ("W2 = "+ str(parameters["W2"]))
print ("b2 = "+ str(parameters["b2"]))

W1 = [[-0.5956207  -0.09991781 -2.14584585  1.82662008]
 [-1.76569677 -0.80627147  0.51115557 -1.18258802]
 [-1.0535704  -0.86128581  0.68284051  2.20374577]]
b1 = [[-0.04659241]
 [-1.28888276]
 [ 0.53405496]]
W2 = [[-0.55569196  0.0354055   1.32964895]]
b2 = [[-0.84610769]]


In [35]:
def train_model(X, Y, config):
    """ Run an L-Layer neural network using config file """
    
    # Unpack configuration file
    
    activations = config.get("activations")
    layers_dims = config.get("layers_dims") 
    learning_rate = config.get("learning_rate")
    num_iterations = config.get("num_iterations")
    print_cost = config.get("print_cost")
    random_seed = config.get("random_seed")
    
    np.random.seed(random_seed)
    costs = [] # keep track of costs
    
    # Initialize model
    
    parameters = initialize_parameters(layers_dims)
    
    for i in range(0, num_iterations):
        
        # Forward propagation
        
        fact = deque(activations.copy())
        AL, caches = L_model_forward(X, parameters, fact)
        
        # Compute cost
        
        cost = compute_cost(AL, Y)
        
        # Backward propagation
        
        bact = deque(activations.copy())
        grads = L_model_backward(AL, Y, caches, bact)
        
        # Update parameters
        
        parameters = update_parameters(parameters, grads, learning_rate)
        
        # Track cost and print values if specified
        if print_cost and i % 1000 == 0:
            #print ("Cost after iteration %i: %f" %(i, cost))
            pass
        if print_cost and i % 100 == 0:
            costs.append((i, cost))
            
    if print_cost:
        costs.append((i, cost))
             
    return parameters, costs
        

In [36]:
def predict(X, y, parameters, activations):
    """ Prediction using params from neural network """
    
    m = X.shape[1]
    n = len(parameters) // 2
    p = np.zeros((1,m))
    
    # Forward propagation
    
    facts = deque(activations.copy())
    probas, caches = L_model_forward(X, parameters, facts)
    
    # Convert probas to 0/1 predictions
    
    for i in range(0, probas.shape[1]):
        if probas[0,i] > 0.5:
            p[0,i] = 1
        else:
            p[0,i] = 0
            
    #print("Accuracy: "  + str(np.sum((p == y)/m)))
    return p
    

In [37]:
def report_model(meta_config:dict):
    """ Generate a model report based on meta_config file """
    
    config_template = meta_config.get("config_template", None)
    learning_range = meta_config.get("learning_range", None)
    learning_step = meta_config.get("learning_step", None)
    X_train = meta_config.get("X_train", None)
    y_train = meta_config.get("y_train", None)
    X_test = meta_config.get("X_test", None)
    y_test = meta_config.get("y_test", None)
    X_validate = meta_config.get("X_validate", None)
    y_validate = meta_config.get("y_validate", None) 
    
    learning_min, learning_max = learning_range
    
    learning_rates = np.arange(learning_min, learning_max, learning_step)
    reports = []
    report = {}
    
    i = 0
    for learning_rate in learning_rates:
        
        config = config_template.copy()
        config["learning_rate"] = learning_rate
        
        # Train the model
        
        parameters, costs = train_model(X_train, y_train, config)
        
        # Predict on training set
        train_score = predict(X_train, y_train, parameters, 
                              config.get("activations"))
        
        # Predict on test set
        test_score = predict(X_test, y_test, parameters, 
                             config.get("activations"))
        
        # Predict on validation set
        validation_score = predict(X_validate, y_validate, 
                                   parameters, config.get("activations"))
        
        model_report = report.copy()
        model_report["learning_rate"] = learning_rate
        model_report["costs"] = costs
        model_report["train_parameters"] = parameters
        model_report["train_score"] = train_score
        model_report["test_score"] = test_score
        model_report["validation_score"] = validation_score
        
        reports.append(model_report)
        
        if i % 100 == 0:
            print("Computing the {}th model".format(i))
            
        i += 1
            
    return reports
        
    

# Running the Model

In [41]:
#X_train, y_train

In [42]:
# Need to figure out config for the data you have available

config = {
    "activations": ["relu", "sigmoid"],
    "layers_dims" : [2, 2, 1], #  2-layer model
    "learning_rate": 0.0075,
    "num_iterations" : 3000,
    "print_cost": True,
    "random_seed": random_state,
}

In [43]:
parameters, costs = train_model(X_train.T.values, y_train.T.values, config) 

In [44]:
score = predict(X_test.T.values, y_test.T.values, parameters, config.get("activations"))

In [47]:
meta_config = {
    "config_template" : config,
    "learning_range": (0.0005, 0.01),
    "learning_step": 0.0005,
    "X_train": X_train.T.values,
    "y_train": y_train.T.values,
    "X_test": X_test.T.values,
    "y_test": y_test.T.values,
    "X_validate": X_val.T.values,
    "y_validate": y_val.T.values,
}

In [48]:
reports_partb = report_model(meta_config)

Computing the 0th model


In [49]:
len(reports_partb)

19