In [342]:
import numpy as np
import pandas as pd

In [343]:
def normalise(x, mean, std):
    return (x - mean) / std

In [344]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [345]:
def cost(x, y, theta):
    predict = sigmoid(x @ theta)
    error = -y * np.log(predict) - (1 - y) * np.log(1 - predict)
    return error.sum() / len(y)

In [346]:
def cost_gradient(x, y, theta, alpha):
    predictions = alpha * (x.T @ (sigmoid(x @ theta) - y)) / len(y)
    return predictions

In [347]:
def train(x, y, theta, epochs, alpha):
    for i in range(epochs):
        theta -= cost_gradient(x, y, theta, alpha)
        print("epoch " + str(i+1) + ": ", cost(x, y, theta))
    return theta

In [348]:
def predict(x, theta):
    return sigmoid(x @ theta)

In [349]:
df = pd.read_csv('diabetes.csv', header = None)
df.shape

(768, 9)

In [360]:
x = df.iloc[:, :-1].values
Y = df.iloc[:, -1].values
for i in range(x.shape[1]):
    me = x.T[i].mean()
    std = np.std(x.T[i])
    for j in range(x.shape[0]):
        x[j][i] = normalise(x[j][i], me, std)
X = np.ones(shape = (x.shape[0], x.shape[1] + 1))
X[:, 1:] = x
Y = Y.reshape((768, 1))
X

array([[ 1.        ,  0.63994726,  0.84832379, ...,  0.20401277,
         0.46849198,  1.4259954 ],
       [ 1.        , -0.84488505, -1.12339636, ..., -0.68442195,
        -0.36506078, -0.19067191],
       [ 1.        ,  1.23388019,  1.94372388, ..., -1.10325546,
         0.60439732, -0.10558415],
       ...,
       [ 1.        ,  0.3429808 ,  0.00330087, ..., -0.73518964,
        -0.68519336, -0.27575966],
       [ 1.        , -0.84488505,  0.1597866 , ..., -0.24020459,
        -0.37110101,  1.17073215],
       [ 1.        , -0.84488505, -0.8730192 , ..., -0.20212881,
        -0.47378505, -0.87137393]])

In [361]:
x_train = X[:600]
y_train = Y[:600]
x_test = X[600:]
y_test = Y[600:]

In [362]:
epochs = 1000
l_rate = 0.001
theta = np.zeros(shape = (X.shape[1], 1))

In [363]:
 theta = train(x_train, y_train, theta, epochs, l_rate)

epoch 1:  0.6930159079360078
epoch 2:  0.6928847476988411
epoch 3:  0.6927536997421894
epoch 4:  0.6926227639598653
epoch 5:  0.6924919402457504
epoch 6:  0.6923612284937948
epoch 7:  0.6922306285980172
epoch 8:  0.692100140452506
epoch 9:  0.6919697639514181
epoch 10:  0.6918394989889793
epoch 11:  0.6917093454594856
epoch 12:  0.6915793032573013
epoch 13:  0.6914493722768604
epoch 14:  0.6913195524126666
epoch 15:  0.691189843559293
epoch 16:  0.6910602456113824
epoch 17:  0.690930758463647
epoch 18:  0.6908013820108695
epoch 19:  0.6906721161479017
epoch 20:  0.690542960769666
epoch 21:  0.6904139157711549
epoch 22:  0.6902849810474302
epoch 23:  0.6901561564936252
epoch 24:  0.6900274420049421
epoch 25:  0.6898988374766549
epoch 26:  0.689770342804107
epoch 27:  0.6896419578827129
epoch 28:  0.6895136826079576
epoch 29:  0.6893855168753968
epoch 30:  0.6892574605806568
epoch 31:  0.6891295136194354
epoch 32:  0.6890016758875004
epoch 33:  0.6888739472806913
epoch 34:  0.68874632769

epoch 433:  0.6454961062500385
epoch 434:  0.6454045582299267
epoch 435:  0.6453130836161467
epoch 436:  0.6452216823352626
epoch 437:  0.6451303543139119
epoch 438:  0.6450390994788058
epoch 439:  0.6449479177567286
epoch 440:  0.6448568090745387
epoch 441:  0.6447657733591677
epoch 442:  0.6446748105376205
epoch 443:  0.6445839205369758
epoch 444:  0.6444931032843849
epoch 445:  0.6444023587070731
epoch 446:  0.6443116867323387
epoch 447:  0.6442210872875531
epoch 448:  0.6441305603001608
epoch 449:  0.6440401056976801
epoch 450:  0.6439497234077013
epoch 451:  0.6438594133578885
epoch 452:  0.6437691754759789
epoch 453:  0.6436790096897821
epoch 454:  0.6435889159271808
epoch 455:  0.6434988941161311
epoch 456:  0.6434089441846612
epoch 457:  0.6433190660608726
epoch 458:  0.6432292596729392
epoch 459:  0.643139524949108
epoch 460:  0.6430498618176981
epoch 461:  0.6429602702071018
epoch 462:  0.6428707500457836
epoch 463:  0.6427813012622808
epoch 464:  0.6426919237852031
epoch 465

In [364]:
theta

array([[-0.13535159],
       [ 0.080401  ],
       [ 0.18436794],
       [ 0.01724995],
       [ 0.02206971],
       [ 0.04882987],
       [ 0.1269515 ],
       [ 0.06740742],
       [ 0.0763174 ]])

In [379]:
y_predict = predict(x_test, theta)
y_predict, cost(x_train, y_train, theta)

(array([[0.38929706],
        [0.35974568],
        [0.41150969],
        [0.5935001 ],
        [0.5024857 ],
        [0.44497098],
        [0.61955077],
        [0.34539208],
        [0.53555043],
        [0.37823062],
        [0.41693303],
        [0.5618412 ],
        [0.63845745],
        [0.46367276],
        [0.59041277],
        [0.37862619],
        [0.42975502],
        [0.30349167],
        [0.53283041],
        [0.38159525],
        [0.46786437],
        [0.42934143],
        [0.65791848],
        [0.43114773],
        [0.38149013],
        [0.44390755],
        [0.37335847],
        [0.42399605],
        [0.48308129],
        [0.35772954],
        [0.45444269],
        [0.40994347],
        [0.37919809],
        [0.41825119],
        [0.40167127],
        [0.48570435],
        [0.42955285],
        [0.40743648],
        [0.51139433],
        [0.34373785],
        [0.41245758],
        [0.44785852],
        [0.50575604],
        [0.38117263],
        [0.4326021 ],
        [0

In [380]:
cnt = 0
y_predict_new = np.where(y_predict >= 0.5, 1, 0)
for i in range(168):
    if y_predict_new[i] == y_test[i]:
        cnt += 1
print(cnt * 100 / 168) # Accuracy

77.97619047619048
