In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import os
import matplotlib.pyplot as plt
%matplotlib inline
import platform

In [6]:
if True:
    import sys
    system = platform.system()
    if system == "Windows":
        sys.path.insert(0, 'C:/Users/Lorenzo/Desktop/Workspace/Github/Project-5/src')
    elif system == "Darwin":
        sys.path.insert(0, '/Users/lorenzogurrola/workspace/github.com/LorenzoGurrola/Project-5/src')
    from data_loader import prepare_train, prepare_test
    import utils

In [7]:
def load_data():
    data = pd.read_csv('../framingham.csv')
    data = data.dropna()
    train, test = train_test_split(data, train_size=0.85, random_state=10)
    X_train, y_train, scalers = prepare_train(train)
    X_test, y_test = prepare_test(test, scalers)
    return X_train, y_train, X_test, y_test

In [8]:
def initialize_params(H):
    assert H[-1] == 1
    params = {}
    param_count = 0
    msg = ''
    for l in range(1, len(H)):
        #If L=3, this will go l=1,2,3
        params['W' + str(l)] = np.random.randn(H[l-1], H[l]) * 0.1
        params['b' + str(l)] = np.zeros((1, H[l]))
        weight_count = H[l-1] * H[l]
        bias_count = H[l]
        msg += f'\nlayer {l}: {H[l]} hidden units, {weight_count + bias_count} params, {weight_count} weights, {bias_count} biases'
        param_count += weight_count + bias_count

    print(f'initialized {param_count} total trainable params over {len(H) -1} layers \n' + msg)
    return params

In [9]:
def relu(z):
    return np.maximum(0, z)

In [10]:
def sigmoid(z):
    a = 1/(1 + np.exp(-z))
    return a

In [11]:
def forward(X, params, H):
    activations = {'A0':X}
    inter_values = {}
    for l in range(1, len(H)):
        inter_values['Z' + str(l)] = activations['A' + str(l-1)] @ params['W' + str(l)] + params['b' + str(l)]
        print(f'model Z{str(l)} {inter_values['Z' + str(l)]}')
        if(l < len(H) - 1): #ReLU until last layer, then sigmoid
            activations['A' + str(l)] = relu(inter_values['Z' + str(l)])
            print(f'model activations A{str(l)} {activations['A' + str(l)]}')
        else:
            activations['A' + str(l)] = sigmoid(inter_values['Z' + str(l)])
            print(f'model activations A{str(l)} {activations['A' + str(l)]}')

    return activations, inter_values

In [12]:
def calculate_cost(yhat, y):
    m = y.shape[0]
    losses = y * np.log(yhat) + (1 - y) * np.log(1 - yhat)
    cost = -np.sum(losses, axis=0, keepdims=True)/m
    return cost

In [None]:
def backward(y, activations, inter_values, params, H):
    m = y.shape[0]
    L = len(H) - 1
    yhat = activations['A' + str(L)]
    derivatives = {}
    grads = {}
    activations[f'dc_dA{L}'] = (-1/m) * ((y/yhat) - ((1 - y)/(1 - yhat)))
    for layer in range(L, 0, -1):
        l = str(layer)
        if(layer == L):
            derivatives[f'dA{l}_dZ{l}'] = yhat * (1 - yhat)
        else:
            derivatives[f'dA{l}_dZ{l}'] = np.where(activations[f'Z{l}'] >= 0, 1, 0)
        derivatives[f'dc_dZ{l}'] = derivatives[f'dA{l}_dZ{l}'] * activations[f'dc_dA{l}']
        grads[f'db{l}'] = np.sum(derivatives[f'dc_dZ{l}'], axis=0, keepdims=True)
        grads[f'dZ{l}_dW{l}'] = np.matmul(activations[f'A{l}'].T, derivatives[f'dc_dZ{l}'])
        
    return grads, derivatives

In [61]:
X_train, y_train, X_test, y_test = load_data()

In [11]:
def backward(y, yhat, values, X, params):
    m = y.shape[0]
    A1 = values['A1']
    Z1 = values['Z1']
    w2 = params['w2']
    dc_dyhat = (-1/m) * ((y/yhat) - ((1 - y)/(1 - yhat)))
    
    dyhat_dz2 = yhat * (1 - yhat)
    dc_dz2 = dc_dyhat * dyhat_dz2
    dc_db2 = np.sum(dc_dz2, axis=0, keepdims=True)
    dc_dw2 = np.matmul(A1.T, dc_dz2)

    dc_dA1 = np.matmul(dc_dz2, w2.T)
    dA1_dZ1 = np.where(Z1 >= 0, 1, 0)
    dc_dZ1 = dc_dA1 * dA1_dZ1
    dc_db1 = np.sum(dc_dZ1, axis=0, keepdims=True)
    dc_dW1 = np.matmul(X.T, dc_dZ1)

    grads = {'dW1':dc_dW1, 'db1':dc_db1, 'dw2':dc_dw2, 'db2':dc_db2}
    return grads

In [28]:
def update_params(params, grads, lr):
    W1 = params['W1']
    b1 = params['b1']
    w2 = params['w2']
    b2 = params['b2']

    dW1 = grads['dW1']
    db1 = grads['db1']
    dw2 = grads['dw2']
    db2 = grads['db2']

    W1 = W1 - lr * dW1
    b1 = b1 - lr * db1
    w2 = w2 - lr * dw2
    b2 = b2 - lr * db2

    new_params = {'W1':W1, 'b1':b1, 'w2':w2, 'b2':b2}
    return new_params

In [29]:
def save_params(model, params):
    path = '../src/models/' + model
    for p in params:
        param_path = path + '/' + p + '.npy'
        np.save(param_path, params[p])

In [34]:
def load_params(model, params):
    path = '../src/models/' + model
    if not os.path.exists(path):
        os.makedirs(path)
        print(f'created new model {model}')
        return params
    else:
        for p in params:
            try: 
                param_path = path + '/' + p + '.npy'
                params[p] = np.load(param_path)
            except FileNotFoundError:
                pass

    return params

In [39]:
def train_loop(model, epochs, lr, X_train, y_train, h):
    params = initialize_params(X_train.shape[1], h)
    params = load_params(model, params)
    for epoch in range(epochs):
        yhat, values = forward(X_train, params)
        cost = calculate_cost(yhat, y_train)
        grads = backward(y_train, yhat, values, X_train, params)
        params = update_params(params, grads, lr)
        print(f'epoch {epoch} cost {cost}')
    save_params(model, params)
    print(f'saved params in model {model}')

In [40]:
def predict(model, X_test, y_test, h):
    params = initialize_params(X_test.shape[1], h)
    params = load_params(model, params)
    yhat, values = forward(X_test, params)
    ypred = (yhat > 0.5).astype(int)
    match = (ypred == y_test)
    score = np.sum(match == 1)/match.size
    return score

In [54]:
model = 'm4'
epochs = 10000
lr = 0.1
hidden_units = 100


train_loop(model, epochs, lr, X_train, y_train, hidden_units)

initialized 2001 total trainable params with 100 hidden units and 18 input features
epoch 0 cost [[0.24740996]]
epoch 1 cost [[0.24739598]]
epoch 2 cost [[0.24738246]]
epoch 3 cost [[0.24736869]]
epoch 4 cost [[0.2473554]]
epoch 5 cost [[0.24734147]]
epoch 6 cost [[0.24732789]]
epoch 7 cost [[0.24731414]]
epoch 8 cost [[0.24730038]]
epoch 9 cost [[0.24728679]]
epoch 10 cost [[0.24727277]]
epoch 11 cost [[0.24725942]]
epoch 12 cost [[0.24724574]]
epoch 13 cost [[0.24723224]]
epoch 14 cost [[0.24721829]]
epoch 15 cost [[0.24720479]]
epoch 16 cost [[0.24719096]]
epoch 17 cost [[0.24717751]]
epoch 18 cost [[0.24716373]]
epoch 19 cost [[0.24715036]]
epoch 20 cost [[0.2471367]]
epoch 21 cost [[0.24712322]]
epoch 22 cost [[0.24710977]]
epoch 23 cost [[0.24709627]]
epoch 24 cost [[0.2470822]]
epoch 25 cost [[0.24706898]]
epoch 26 cost [[0.24705532]]
epoch 27 cost [[0.24704186]]
epoch 28 cost [[0.24702803]]
epoch 29 cost [[0.24701491]]
epoch 30 cost [[0.2470008]]
epoch 31 cost [[0.2469876]]
epo

In [56]:
model = 'm4'
hidden_units = 100

score = predict(model, X_train, y_train, hidden_units)
score

initialized 2001 total trainable params with 100 hidden units and 18 input features


0.953331187640811