In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import math

In [28]:
def gradient_descent_without_scikit(y_train_n, w_init, b_init, x_train_n, iterations, a):

    m = x_train_n.shape[0]
    dj_db_history = np.zeros(iterations)
    dj_dw_history = np.zeros((iterations, w_init.size))
    j_history = np.zeros(iterations)
    y_pred_history = np.zeros((iterations,m))
    w = w_init
    b = b_init

    y_pred = compute_y_pred(w, x_train_n, b)
    for i in range(iterations):
        dj_db, dj_dw = compute_gradient_1(y_pred, y_train_n, x_train_n)
        # j_history[i] = compute_cost(y_pred, y_train_n)
        dj_db_history[i] = dj_db
        dj_dw_history[i] = dj_dw
        b = b - a * dj_db
        w = w - a * dj_dw
        y_pred = compute_y_pred(w, x_train_n, b)
        y_pred_history[i,:] = y_pred

    return w, b, dj_dw_history, dj_db_history, j_history, y_pred_history

def compute_gradient_1(y_pred, y_train_n, x_train_n):
    m=y_pred.size
    n=x_train_n.shape[1]
    dj_dw = np.zeros(x_train_n.shape, dtype=float)
    dj_db = np.zeros(m, dtype=float)
    for i in range(m):
        dj_db[i]=(y_pred[i]-y_train_n[i])
        for j in range(n):
            dj_dw[i, j] = ((y_pred[i] - y_train_n[i]) * x_train_n[i, j])

    w_cost =np.zeros(n, dtype=float)
    for i in range(n):
        w_cost[i]=np.sum(dj_dw[:, i])

    w_cost = w_cost / m
    b_cost = np.sum(dj_db)/m
    return b_cost, w_cost

# def compute_gradient(y_pred, y_train, x_train):
#     
#     #Convert the pandas Dataframes to numpy Arrays to allow for easier manipulation. Indexing not possible in DF.  
# #     y_pred : numpy array
#     y_train_n = y_train.to_numpy(dtype=float)
#     x_train_n = x_train.to_numpy(dtype=float) 
#     
#     m=y_pred.size
#     n=x_train_n.shape[1]
#     dj_dw = np.zeros((x_train.shape), dtype=float)
#     dj_db = np.zeros(m, dtype=float)
#     for i in range(m):
#         dj_db[i]=(y_pred[i]-y_train_n[i])
#         for j in range(n): 
#             dj_dw[i, j] = ((y_pred[i] - y_train_n[i]) * x_train_n[i, j])
#     
#     w_cost =np.zeros(n, dtype=float)
#     for i in range(n):
#         w_cost[i]=np.sum(dj_dw[:, i])
# 
#     w_cost = w_cost / m
#     b_cost = np.sum(dj_db)/m
#     return b_cost, w_cost

def compute_cost(y_predicted, y_actual):
    # both numpy arrays 
    sum_array = np.zeros(y_predicted.size, dtype=float)
    for i in range(y_predicted.size):
        if y_actual[i]==1:
            sum_array[i]=-1*math.log(y_predicted[i])
        if y_actual[i]==0:
            sum_array[i]=-1*math.log(1-y_predicted[i])
            
    cost = np.sum(sum_array) / y_predicted.size
    return cost

def compute_y_pred(w, x, b):
    m = x.shape[0]
    y_pred = np.zeros(m)
    for i in range(m):
        y_pred[i] = 1 / (1 + np.exp(-1*np.dot(x[i], w)+b))
    return y_pred

In [41]:
df = pd.read_csv('datasets/framingham.csv')
df.fillna(0)

Unnamed: 0,male,age,education,currentSmoker,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totChol,sysBP,diaBP,BMI,heartRate,glucose,TenYearCHD
0,1,39,4.0,0,0.0,0.0,0,0,0,195.0,106.0,70.0,26.97,80.0,77.0,0
1,0,46,2.0,0,0.0,0.0,0,0,0,250.0,121.0,81.0,28.73,95.0,76.0,0
2,1,48,1.0,1,20.0,0.0,0,0,0,245.0,127.5,80.0,25.34,75.0,70.0,0
3,0,61,3.0,1,30.0,0.0,0,1,0,225.0,150.0,95.0,28.58,65.0,103.0,1
4,0,46,3.0,1,23.0,0.0,0,0,0,285.0,130.0,84.0,23.10,85.0,85.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4233,1,50,1.0,1,1.0,0.0,0,1,0,313.0,179.0,92.0,25.97,66.0,86.0,1
4234,1,51,3.0,1,43.0,0.0,0,0,0,207.0,126.5,80.0,19.71,65.0,68.0,0
4235,0,48,2.0,1,20.0,0.0,0,0,0,248.0,131.0,72.0,22.00,84.0,86.0,0
4236,0,44,1.0,1,15.0,0.0,0,0,0,210.0,126.5,87.0,19.16,86.0,0.0,0


In [42]:
y = df['TenYearCHD']
x = df.drop(['TenYearCHD'], axis=1)

In [43]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=100)


In [44]:
x_train_n = x_train.to_numpy()
x_test_n = x_test.to_numpy()
y_train_n = y_train.to_numpy()
y_test_n = y_test.to_numpy()

In [45]:
y_train_n

array([1, 0, 0, ..., 0, 0, 0], dtype=int64)

In [47]:
m = x_train_n.shape[0]
n = x_train_n.shape[1]
w = np.zeros(n)
# w = np.array([.55, 6.6, 7.8, 8.0])
b = 0.0
iterations = 100
learning_rate = .0001

w, b, dj_dw_history, dj_db_history, j_history, y_pred_history = gradient_descent_without_scikit(y_train_n, w, b, x_train_n, iterations, learning_rate)

convergence_point = j_history[-1]
gradient_w = dj_dw_history[-1]
gradient_b = dj_db_history[-1]
epsilon = j_history[-2] - j_history[-1]

print(f"the cost function decreased by {epsilon} "
      f"It should decrease by {learning_rate}")
print(f"lowest cost using gradient descent is {convergence_point}")

# expected = compute_cost(y_lr_train_pred, y_train_n)
# print(f"the expected cost using scikit LR is {expected}")

the cost function decreased by nan It should decrease by 0.0001
lowest cost using gradient descent is nan


In [40]:
m = x_train_n.shape[0]
n = x_train_n.shape[1]
w = np.zeros(n)
# w = np.array([.55, 6.6, 7.8, 8.0])
b = 0.0
y_pred = compute_y_pred(w, x_train_n, b)
x_train_n

array([[  1.  ,  51.  ,   1.  , ...,  24.1 ,  60.  ,  73.  ],
       [  0.  ,  48.  ,   1.  , ...,  22.51, 110.  ,  78.  ],
       [  0.  ,  38.  ,   2.  , ...,  24.45,  75.  ,  90.  ],
       ...,
       [  0.  ,  45.  ,   3.  , ...,  22.86,  75.  ,  92.  ],
       [  1.  ,  41.  ,   2.  , ...,  30.58,  85.  ,  65.  ],
       [  0.  ,  61.  ,   1.  , ...,  51.28,  80.  , 103.  ]])

In [35]:
a = np.array([1, 1, 1])
c = np.array([0, 1, 1])
compute_cost(a, c)

ValueError: math domain error