In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
from sklearn.model_selection import train_test_split

In [3]:
df=pd.read_csv('heart.csv')

In [4]:
df.corr()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
age,1.0,-0.10324,-0.071966,0.271121,0.219823,0.121243,-0.132696,-0.390227,0.088163,0.208137,-0.169105,0.271551,0.072297,-0.229324
sex,-0.10324,1.0,-0.041119,-0.078974,-0.198258,0.0272,-0.055117,-0.049365,0.139157,0.084687,-0.026666,0.111729,0.198424,-0.279501
cp,-0.071966,-0.041119,1.0,0.038177,-0.081641,0.079294,0.043581,0.306839,-0.401513,-0.174733,0.131633,-0.176206,-0.163341,0.434854
trestbps,0.271121,-0.078974,0.038177,1.0,0.127977,0.181767,-0.123794,-0.039264,0.061197,0.187434,-0.120445,0.104554,0.059276,-0.138772
chol,0.219823,-0.198258,-0.081641,0.127977,1.0,0.026917,-0.14741,-0.021772,0.067382,0.06488,-0.014248,0.074259,0.100244,-0.099966
fbs,0.121243,0.0272,0.079294,0.181767,0.026917,1.0,-0.104051,-0.008866,0.049261,0.010859,-0.061902,0.137156,-0.042177,-0.041164
restecg,-0.132696,-0.055117,0.043581,-0.123794,-0.14741,-0.104051,1.0,0.048411,-0.065606,-0.050114,0.086086,-0.078072,-0.020504,0.134468
thalach,-0.390227,-0.049365,0.306839,-0.039264,-0.021772,-0.008866,0.048411,1.0,-0.380281,-0.349796,0.395308,-0.207888,-0.098068,0.422895
exang,0.088163,0.139157,-0.401513,0.061197,0.067382,0.049261,-0.065606,-0.380281,1.0,0.310844,-0.267335,0.107849,0.197201,-0.438029
oldpeak,0.208137,0.084687,-0.174733,0.187434,0.06488,0.010859,-0.050114,-0.349796,0.310844,1.0,-0.575189,0.221816,0.202672,-0.438441


In [5]:
X=df.iloc[:,:-1]
y=df.iloc[:,-1]

In [6]:
X.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2


In [7]:
y.head()

0    0
1    0
2    0
3    0
4    0
Name: target, dtype: int64

In [8]:
def zscore_normalize_features(X):

    mu     = np.mean(X, axis=0)                 
    sigma  = np.std(X, axis=0)
    
    X_norm = (X - mu) / sigma      

    return (X_norm, mu, sigma)

In [9]:
def sigmoid(z):
   
    g=np.zeros_like(z)
    
    g=1/(1+np.exp(-z))
        
    return g

In [10]:
def compute_cost(X, y, w, b):

    m, n = X.shape

    z_wb = np.dot(X, w) + b
    f_wb = sigmoid(z_wb)

    loss = -y * np.log(f_wb) - (1 - y) * np.log(1 - f_wb)
    cost = np.mean(loss)

    return cost

In [11]:
def compute_gradient(X, y, w, b): 
  
    m, n = X.shape
    dj_dw = np.zeros(w.shape)
    dj_db = 0.

    z_wb = np.dot(X, w) + b
    f_wb = sigmoid(z_wb)

    dj_db = np.sum(f_wb - y)
    dj_dw = np.dot((f_wb - y), X) / m

    return dj_db, dj_dw

In [12]:
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters): 
    
    m = len(X)
    
    J_history = []
    w_history = []
    
    for i in range(num_iters):

        dj_db, dj_dw = gradient_function(X, y, w_in, b_in)   

        w_in = w_in - alpha * dj_dw               
        b_in = b_in - alpha * dj_db              
       
        if i<100000:     
            cost =  cost_function(X, y, w_in, b_in)
            J_history.append(cost)

        if i% math.ceil(num_iters/10) == 0 or i == (num_iters-1):
            w_history.append(w_in)
            print(f"Iteration {i:4}: Cost {float(J_history[-1]):8.2f}   ")
        
    return w_in, b_in, J_history, w_history 

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
X_train_norm,mu,sigma= zscore_normalize_features(X_train)
X_test_norm,mu,sigma= zscore_normalize_features(X_test)

In [15]:
X.shape

(1025, 13)

In [16]:
y.shape

(1025,)

In [17]:
w=np.zeros(X.shape[1])
b=0

In [18]:
w.shape

(13,)

In [19]:
w,b,_,_ =gradient_descent(X_train_norm, y_train, w, b, compute_cost, compute_gradient, alpha=0.003, num_iters=10000)


Iteration    0: Cost     0.69   
Iteration 1000: Cost     0.40   
Iteration 2000: Cost     0.36   
Iteration 3000: Cost     0.35   
Iteration 4000: Cost     0.34   
Iteration 5000: Cost     0.34   
Iteration 6000: Cost     0.33   
Iteration 7000: Cost     0.33   
Iteration 8000: Cost     0.33   
Iteration 9000: Cost     0.33   
Iteration 9999: Cost     0.33   


In [20]:
def predict(X, w, b):
    
    z_wb = np.dot(X, w) + b
    
    f_wb = 1 / (1 + np.exp(-z_wb))
    
    y_pred = (f_wb >= 0.5).astype(int)
    
    return y_pred

In [21]:
y_pred = predict(X_test_norm,w,b)

In [22]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.80


In [23]:
from sklearn.metrics import confusion_matrix

conf_matrix = confusion_matrix(y_test, y_pred)

print('Confusion Matrix:')
print(conf_matrix)

Confusion Matrix:
[[70 32]
 [10 93]]
