In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import os
import matplotlib.pyplot as plt
%matplotlib inline
import platform

In [2]:
if True:
    import sys
    system = platform.system()
    if system == "Windows":
        sys.path.insert(0, 'C:/Users/Lorenzo/Desktop/Workspace/Github/Project-4/src')
    elif system == "Darwin":
        sys.path.insert(0, '/Users/lorenzogurrola/workspace/github.com/LorenzoGurrola/Project-4/src')
    from data_loader import prepare_train, prepare_test

In [3]:
def load_data():
    data = pd.read_csv('../framingham.csv')
    data = data.dropna()
    train, test = train_test_split(data, train_size=0.85, random_state=10)
    X_train, y_train, scalers = prepare_train(train)
    X_test, y_test = prepare_test(test, scalers)
    return X_train, y_train, X_test, y_test

In [4]:
X_train, y_train, X_test, y_test = load_data()
X_train.shape

(3107, 18)

In [5]:
X_train

array([[ 1.        ,  1.45632121,  1.        , ...,  0.        ,
         0.        ,  1.        ],
       [ 1.        ,  1.45632121,  1.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 1.        ,  0.8710576 ,  1.        , ...,  0.        ,
         1.        ,  0.        ],
       ...,
       [ 0.        ,  0.51989944,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        , -0.29946961,  1.        , ...,  1.        ,
         0.        ,  0.        ],
       [ 1.        , -1.46999683,  1.        , ...,  0.        ,
         0.        ,  1.        ]])

In [21]:
def initialize_params(n, h):
    W1 = np.random.randn(n, h) * 0.1
    b1 = np.zeros((1, h))
    w2 = np.random.randn(h, 1) * 0.1
    b2 = np.zeros((1, 1))
    params = {'W1':W1, 'b1':b1, 'w2':w2, 'b2':b2}
    param_count = n * h + 2 * h + 1
    print(f'initialized {param_count} total trainable params with {h} hidden units and {n} input features')
    return params

In [22]:
def relu(z):
    return np.maximum(0, z)

In [23]:
def sigmoid(z):
    a = 1/(1 + np.exp(-z))
    return a

In [24]:
def forward(X, params):
    W1 = params['W1']
    b1 = params['b1']
    w2 = params['w2']
    b2 = params['b2']

    Z1 = X @ W1 + b1
    A1 = relu(Z1)

    inter_vals = {'Z1':Z1, 'A1':A1}

    z2 = A1 @ w2 + b2
    a2 = sigmoid(z2)

    return a2, inter_vals

In [25]:
def calculate_cost(yhat, y):
    m = y.shape[0]
    losses = y * np.log(yhat) + (1 - y) * np.log(1 - yhat)
    cost = -np.sum(losses, axis=0, keepdims=True)/m
    return cost

In [32]:
def backward(y, yhat, inter_vals, X, params):
    m = y.shape[0]
    A1 = inter_vals['A1']
    Z1 = inter_vals['Z1']
    w2 = params['w2']
    dc_dyhat = (-1/m) * ((y/yhat) - ((1 - y)/(1 - yhat)))
    dyhat_dz2 = yhat * (1 - yhat)
    dc_dz2 = dc_dyhat * dyhat_dz2
    dc_db2 = np.sum(dc_dz2, axis=0, keepdims=True)
    dc_dw2 = np.matmul(A1.T, dc_dz2)

    dc_dA1 = np.matmul(dc_dz2, w2)
    dA1_Z1 = np.where(Z1 >= 0, 1, 0)
    dc_dZ1 = np.matmul(dc_dA1, dA1_Z1)
    dc_db1 = np.sum(dc_dZ1, axis=0, keepdims=True)
    dc_dW1 = np.matmul(X.T, dc_dZ1)

    grads = {'dW1':dc_dW1, 'db1':dc_db1, 'dW2':dc_dw2, 'db2':dc_db2}
    

In [33]:
n = 18
h = 3

params = initialize_params(n, h)

initialized 61 total trainable params with 3 hidden units and 18 input features


In [34]:
yhat, inter_vals = forward(X_train, params)

In [35]:
cost = calculate_cost(yhat, y_train)

In [36]:
grads = backward(y_train, yhat, inter_vals, X_train, params)

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 3 is different from 1)