In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('../input/heart-disease-prediction-using-logistic-regression/framingham.csv')
df.head()

Unnamed: 0,male,age,education,currentSmoker,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totChol,sysBP,diaBP,BMI,heartRate,glucose,TenYearCHD
0,1,39,4.0,0,0.0,0.0,0,0,0,195.0,106.0,70.0,26.97,80.0,77.0,0
1,0,46,2.0,0,0.0,0.0,0,0,0,250.0,121.0,81.0,28.73,95.0,76.0,0
2,1,48,1.0,1,20.0,0.0,0,0,0,245.0,127.5,80.0,25.34,75.0,70.0,0
3,0,61,3.0,1,30.0,0.0,0,1,0,225.0,150.0,95.0,28.58,65.0,103.0,1
4,0,46,3.0,1,23.0,0.0,0,0,0,285.0,130.0,84.0,23.1,85.0,85.0,0


In [3]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def f(w, x, b):
    return sigmoid(w.dot(x) + b)


In [4]:
df = df.dropna()
npDF = df.to_numpy()

x_train = npDF[:2560, :-1]
y_train = npDF[:2560, -1]


In [5]:
def cost(w, x, y, b):
    m = x.shape[0]
    cost = 0.0
    for i in range(m):
        cost += y[i] * np.log(f(w, x[i], b)) + (
            (1 - y[i]) * (np.log(1 - f(w, x[i], b))) / 2)
    cost /= -m
    return cost


In [6]:
def compute_gradient(w, x, y, b):
    m = x.shape[0]
    n = x.shape[1]
    dj_dw = np.zeros((n, ))
    dj_db = 0.0
    for j in range(n):
        for i in range(m):
            dj_dw[j] += (f(w, x[i], b) - y[i]) * x[i][j]
        dj_dw[j] /= m

    for i in range(m):
        dj_db += f(w, x[i], b) - y[i]
    dj_db /= m

    return (dj_dw, dj_db)

In [7]:
def gradient_descent(w, x, y, b, lr):
    n = x.shape[1]
    i = 0
    while True:
        prev_cost = cost(w, x, y, b)
        dj_dw, dj_db = compute_gradient(w, x, y, b)
        for j in range(n):
            w[j] = w[j] - (lr * dj_dw[j])
        b = b - (lr * dj_db)
        new_cost = cost(w, x, y, b)
        i += 1
        print(f"Cost After epoch-{i}: {new_cost}")
        if abs(new_cost - prev_cost) < 0.000001:
            print(f'Breakpoint diff: {prev_cost - new_cost}')
            break


In [8]:
n = x_train.shape[1]
# create an np array of size m
weights = np.zeros((n, ))
bias = 0
gradient_descent(weights, x_train, y_train, bias, 0.00001)

Cost After epoch-1: 0.3650701325050403
Cost After epoch-2: 0.3492249623650292
Cost After epoch-3: 0.3427192748530961
Cost After epoch-4: 0.34084697008525133
Cost After epoch-5: 0.3412536496230746
Cost After epoch-6: 0.34274305999207766
Cost After epoch-7: 0.34469534166128735
Cost After epoch-8: 0.3467856893450638
Cost After epoch-9: 0.34884487729648417
Cost After epoch-10: 0.3507877649235916
Cost After epoch-11: 0.35257541767916484
Cost After epoch-12: 0.35419444330168764
Cost After epoch-13: 0.3556454464716077
Cost After epoch-14: 0.35693646209219615
Cost After epoch-15: 0.35807917944745676
Cost After epoch-16: 0.3590867657462819
Cost After epoch-17: 0.35997262251631273
Cost After epoch-18: 0.36074969313146765
Cost After epoch-19: 0.36143009842477103
Cost After epoch-20: 0.3620249678402531
Cost After epoch-21: 0.3625443862909531
Cost After epoch-22: 0.3629974081615342
Cost After epoch-23: 0.36339210874230704
Cost After epoch-24: 0.3637356548946231
Cost After epoch-25: 0.36403438385716

In [9]:
x_test = npDF[2560:, :-1]
y_test = npDF[2560:, -1]


In [10]:
m = x_test.shape[0]
correct = 0
for i in range(m):
    prediction = f(weights, x_test[i], bias)
    prediction = prediction >= 0.5
    if prediction == y_test[i]:
        correct += 1
print(f"Accuracy: {(correct/m)*100}%")

Accuracy: 84.85401459854015%
