In [73]:
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import os
import matplotlib.pyplot as plt
%matplotlib inline
import scipy.stats as stats 

In [74]:
def load_data():
    std_dev = 1
    mean = 5
    x1 = np.linspace(1, 9, 100)
    x2 = np.linspace(3, 11, 100)
    y = ((x1 > 6) & (x2 > 9)).astype(int)
    X = np.stack((x1, x2), axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=10)
    y_train = np.reshape(y_train, (y_train.shape[0], 1))
    y_test = np.reshape(y_test, (y_test.shape[0], 1))
    return X_train, X_test, y_train, y_test

In [None]:
def load_data2():
    pass

In [75]:
def initialize_params(X):
    n = X.shape[1]
    w = np.random.randn(n, 1) * 0.1
    b = np.zeros((1, 1))
    params = {'w':w, 'b':b}
    return params

In [76]:
def sigmoid(z):
    a = 1/(1 + np.exp(-z))
    return a

In [77]:
def forward_prop(X, params):
    w = params['w']
    b = params['b']
    z = X @ w + b
    yhat = sigmoid(z)
    return yhat

In [78]:
def calculate_cost(yhat, y):
    m = y.shape[0]
    losses = y * np.log(yhat) + (1 - y) * np.log(1 - yhat)
    cost = -np.sum(losses, axis=0, keepdims=True)/m
    return cost

In [79]:
def back_prop(y, yhat, X):
    m = y.shape[0]
    dc_dyhat = (-1/m) * ((y/yhat) - ((1-y)/(1-yhat)))
    dyhat_dz = yhat * (1 - yhat)
    dc_dz = dc_dyhat * dyhat_dz
    dc_dw = np.matmul(X.T, dc_dz)
    dc_db = np.sum(dc_dz, axis=0, keepdims=True)
    grads = {'dw':dc_dw, 'db':dc_db}
    return grads
    

In [80]:
def update_params(params, grads, lr=0.1):
    w = params['w']
    b = params['b']
    dw = grads['dw']
    db = grads['db']
    w = w - lr * dw
    b = b - lr * db
    new_parms = {'w':w,'b':b}
    return new_parms

In [86]:
def save_params(model, params):
    path = '../src/models/' + model
    for p in params:
        param_path = path + '/' + p + '.npy'
        np.save(param_path, params[p])

In [87]:
def load_params(model, X_train):
    params = initialize_params(X_train)
    path = '../src/models/' + model
    if not os.path.exists(path):
        os.makedirs(path)
        print(f'created new model {model}')
        return params
    else:
        for p in params:
            try: 
                param_path = path + '/' + p + '.npy'
                params[p] = np.load(param_path)
                print(f'loaded param {p} with value {params[p]}')
            except FileNotFoundError:
                print(f'couldnt find param {p}, continuing with default value')

    return params

In [91]:
def train_loop(model, epochs, X_train, y_train, lr):
    params = load_params(model, X_train)
    for epoch in range(epochs):
        yhat = forward_prop(X_train, params)
        cost = calculate_cost(yhat, y_train)
        grads = back_prop(y_train, yhat, X_train)
        params = update_params(params, grads, lr)
        print(f'epoch {epoch} cost {cost}')
    save_params(model, params)
    print(f'saved params {params} in model {model}')

In [99]:
def predict(model, X_test, y_test):
    params = load_params(model, np.random.randn(80,2))
    yhat = forward_prop(X_test, params)
    ypred = (yhat > 0.5).astype(int)
    match = (ypred == y_test)
    score = np.sum(match == 1)/match.size
    return score

In [97]:
X_train, X_test, y_train, y_test = load_data()

In [116]:
model = 'm3'
epochs = 100000
lr = 0.05

train_loop(model, epochs, X_train, y_train, lr)

loaded param w with value [[ 7.40128234]
 [-5.05611368]]
loaded param b with value [[-6.12358106]]
epoch 0 cost [[0.08701326]]
epoch 1 cost [[0.08701011]]
epoch 2 cost [[0.08700695]]
epoch 3 cost [[0.08700379]]
epoch 4 cost [[0.08700063]]
epoch 5 cost [[0.08699748]]
epoch 6 cost [[0.08699432]]
epoch 7 cost [[0.08699117]]
epoch 8 cost [[0.08698801]]
epoch 9 cost [[0.08698486]]
epoch 10 cost [[0.0869817]]
epoch 11 cost [[0.08697855]]
epoch 12 cost [[0.08697539]]
epoch 13 cost [[0.08697224]]
epoch 14 cost [[0.08696909]]
epoch 15 cost [[0.08696594]]
epoch 16 cost [[0.08696278]]
epoch 17 cost [[0.08695963]]
epoch 18 cost [[0.08695648]]
epoch 19 cost [[0.08695333]]
epoch 20 cost [[0.08695018]]
epoch 21 cost [[0.08694703]]
epoch 22 cost [[0.08694388]]
epoch 23 cost [[0.08694073]]
epoch 24 cost [[0.08693758]]
epoch 25 cost [[0.08693444]]
epoch 26 cost [[0.08693129]]
epoch 27 cost [[0.08692814]]
epoch 28 cost [[0.086925]]
epoch 29 cost [[0.08692185]]
epoch 30 cost [[0.0869187]]
epoch 31 cost [[

In [117]:
model = 'm3'

score = predict(model, X_test, y_test)
score

loaded param w with value [[ 17.13994289]
 [-11.72281645]]
loaded param b with value [[-14.32626272]]


0.95