In [37]:
import pandas as pd
import numpy as np

In [40]:
df = pd.read_csv('Data/Heart.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca,Thal,AHD
0,1,63,1,typical,145,233,1,2,150,0,2.3,3,0.0,fixed,No
1,2,67,1,asymptomatic,160,286,0,2,108,1,1.5,2,3.0,normal,Yes
2,3,67,1,asymptomatic,120,229,0,2,129,1,2.6,2,2.0,reversable,Yes
3,4,37,1,nonanginal,130,250,0,0,187,0,3.5,3,0.0,normal,No
4,5,41,0,nontypical,130,204,0,2,172,0,1.4,1,0.0,normal,No


In [145]:
def hypothesis(X, theta):
    z = np.dot(theta, X.T)
    return 1 / (1 + np.exp(-z)) - 0.0000001

In [146]:
def cost(X, y, theta):
    m = X.shape[0]
    h = hypothesis(X, theta)
    J = (1/m) * (-y.T * np.log(h) - (1-y).T * np.log(1-h))
    return J

In [147]:
def gradient_descent(X, y, theta, alpha, epochs):
    m = X.shape[0]
    J = [cost(X, y, theta)]
    for i in range(0, epochs):
        h = hypothesis(X, theta)
        for i in range(0, X.shape[1]):
            theta[i] -= (alpha/m) * np.sum((h - y)*X[:, i])
        J.append(cost(X, y, theta))
    return theta

In [148]:
def predict(X, y, theta, alpha, epochs):
    th = gradient_descent(X, y, theta, alpha, epochs)
    h = hypothesis(X, theta)
    for i in range(len(h)):
        h[i] = 1 if h[i] >= 0.5 else 0
    y_lst = list(y)
    acc = np.sum([y[i] == h[i] for i in range(len(y))]) / len(y)
    return acc

In [47]:
#Convert categorical features to numerical data 
df['ChestPainx'] = df.ChestPain.replace({'typical' : 1, "asymptomatic": 2, "nonanginal": 3, "nontypical": 4})
df['Thal'] = df.Thal.replace({'fixed': 1, 'normal': 2, 'reversable' : 3})
df['AHD'] = df.AHD.replace({'Yes': 1, 'No': 0})

In [48]:
df.head()

Unnamed: 0.1,Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca,Thal,AHD,ChestPainx
0,1,63,1,typical,145,233,1,2,150,0,2.3,3,0.0,1.0,0,1
1,2,67,1,asymptomatic,160,286,0,2,108,1,1.5,2,3.0,2.0,1,2
2,3,67,1,asymptomatic,120,229,0,2,129,1,2.6,2,2.0,3.0,1,2
3,4,37,1,nonanginal,130,250,0,0,187,0,3.5,3,0.0,2.0,0,3
4,5,41,0,nontypical,130,204,0,2,172,0,1.4,1,0.0,2.0,0,4


In [50]:
df = pd.concat([pd.Series(1, index = df.index, name = '00'), df], axis=1)

In [51]:
df.head()

Unnamed: 0.1,00,Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca,Thal,AHD,ChestPainx
0,1,1,63,1,typical,145,233,1,2,150,0,2.3,3,0.0,1.0,0,1
1,1,2,67,1,asymptomatic,160,286,0,2,108,1,1.5,2,3.0,2.0,1,2
2,1,3,67,1,asymptomatic,120,229,0,2,129,1,2.6,2,2.0,3.0,1,2
3,1,4,37,1,nonanginal,130,250,0,0,187,0,3.5,3,0.0,2.0,0,3
4,1,5,41,0,nontypical,130,204,0,2,172,0,1.4,1,0.0,2.0,0,4


In [91]:
X = df.drop(columns=['Unnamed: 0', 'ChestPain', 'Thal']).to_numpy()
y = df['AHD']

In [149]:
theta = np.zeros(X.shape[1])
acc = predict(X, y, theta, 0.0001, 25000)
acc

0.8217821782178217

## Vectorized

In [254]:
X_n = df.drop(columns=['Unnamed: 0', 'ChestPain', 'Thal']).to_numpy()
np.reshape(X_n, (303, 14))
y_n = df['AHD']

ValueError: cannot reshape array of size 4238 into shape (303,14)

In [250]:
def sigmoid_vec(z):
    return 1 / (1 + np.exp(-z))

In [251]:
def cost_vec(X_n, y_n, theta_n):
    m = y_n.size
    h = sigmoid(np.dot(X_n, theta_n))
    print(h)
    J = (1/m) * np.dot(-y, np.log(h)) - np.dot((1-y), np.log(1-h))
    print(J)
    grad = (1/m) * np.dot(h-y, X_n)
    print(grad)
    return J, grad

In [252]:
# Initialize fitting parameters
n = X_n.shape[1]
initial_theta = np.zeros(n)

cost, grad = cost_vec(X_n, y_n, initial_theta)
print('Cost at initial theta (zeros): {:.3f}'.format(cost))

print('Gradient at initial theta (zeros):')
print('\t[{:.4f}, {:.4f}, {:.4f}]'.format(*grad))

IndexError: tuple index out of range