In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('heart.csv')
data.head()

Unnamed: 0,male,age,education,currentSmoker,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totChol,sysBP,diaBP,BMI,heartRate,glucose,TenYearCHD
0,1,39,4.0,0,0.0,0.0,0,0,0,195.0,106.0,70.0,26.97,80.0,77.0,0
1,0,46,2.0,0,0.0,0.0,0,0,0,250.0,121.0,81.0,28.73,95.0,76.0,0
2,1,48,1.0,1,20.0,0.0,0,0,0,245.0,127.5,80.0,25.34,75.0,70.0,0
3,0,61,3.0,1,30.0,0.0,0,1,0,225.0,150.0,95.0,28.58,65.0,103.0,1
4,0,46,3.0,1,23.0,0.0,0,0,0,285.0,130.0,84.0,23.1,85.0,85.0,0


In [10]:
data = pd.read_csv('heart.csv')
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))


def calculate_gradient(theta, X, y):
    m = y.size
    return (X.T @ (sigmoid(X @ theta) - y)) / m


def gradient_descent(X, y, alpha = 0.1, num_iter = 100, tol = 1e-7):
    
    X_b = np.c_[np.ones((X.shape[0], 1)), X]
    
    theta = np.zeros(X_b.shape[1])
    
    for i in range(num_iter):
        
        grad = calculate_gradient(theta, X_b, y)
        theta -= alpha * grad
        
        if np.linalg.norm(grad) < tol:
            break
    return theta

def predict_prob(X, theta):
    X_b = np.c_[np.ones((X.shape[0], 1)), X]
    return sigmoid(X_b @ theta)

def predict(X, theta, threshold=0.5):
    return (predict_prob(X, theta) >= threshold).astype(int)

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

data.dropna(inplace=True)


X = data.drop("TenYearCHD", axis=1)
y = data["TenYearCHD"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

theta_hat = gradient_descent(X_train_scaled, y_train, alpha=0.1)

y_pred_train = predict(X_train_scaled, theta_hat)
y_pred_test = predict(X_test_scaled, theta_hat)

train_acc = accuracy_score(y_train, y_pred_train)
test_acc = accuracy_score(y_test, y_pred_test)

print(train_acc)
print(test_acc)

0.8587551299589603
0.8333333333333334


In [16]:
person = {
    "male": 1,
    "age": 58,
    "education": 2,
    "currentSmoker": 1,
    "cigsPerDay": 20,
    "BPMeds": 0,
    "prevalentStroke": 0,
    "prevalentHyp": 1,
    "diabetes": 0,
    "totChol": 250,
    "sysBP": 145,
    "diaBP": 90,
    "BMI": 27.5,
    "heartRate": 85,
    "glucose": 95
}


df = pd.DataFrame([person])

person_scaled = scaler.transform(df)

prob = predict_prob(person_scaled, theta_hat)[0]
prediction = predict(person_scaled, theta_hat)[0]

print(f"Probability : {prob:.2f}")
print("Prediction : ", "At Risk" if prediction == 1 else "Not at risk")

Probability : 0.38
Prediction :  Not at risk


In [17]:
from sklearn.metrics import classification_report, confusion_matrix

# After predictions:
y_pred_test = predict(X_test_scaled, theta_hat)

# Classification report
print(classification_report(y_test, y_pred_test, target_names=["No CHD", "CHD"]))

# Optional: Confusion Matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_test))


              precision    recall  f1-score   support

      No CHD       0.84      0.99      0.91       611
         CHD       0.47      0.06      0.10       121

    accuracy                           0.83       732
   macro avg       0.65      0.52      0.51       732
weighted avg       0.78      0.83      0.78       732

Confusion Matrix:
[[603   8]
 [114   7]]


In [18]:
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(class_weight='balanced', max_iter=1000)
clf.fit(X_train_scaled, y_train)
y_pred_test_sk = clf.predict(X_test_scaled)

from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_test_sk, target_names=["No CHD", "CHD"]))


              precision    recall  f1-score   support

      No CHD       0.90      0.67      0.77       611
         CHD       0.27      0.63      0.38       121

    accuracy                           0.66       732
   macro avg       0.59      0.65      0.57       732
weighted avg       0.80      0.66      0.70       732

