<a href="https://colab.research.google.com/github/Sanchita210507/BML-Experiments/blob/main/Exp_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np

path = '/content/drive/MyDrive/heart_disease/processed.cleveland.data'

# Load dataset without header
df = pd.read_csv(path, header=None)

# Assign official UCI column names
df.columns = [
    "age","sex","cp","trestbps","chol",
    "fbs","restecg","thalach","exang","oldpeak",
    "slope","ca","thal","num"
]

df = df.replace("?", np.nan)
df = df.apply(pd.to_numeric)

df = df.dropna()

# Convert target to binary (0 vs >=1)
df["num"] = (df["num"] > 0).astype(int)

# Features and target
X = df.drop("num", axis=1).values
y = df["num"].values.reshape(-1, 1)

# Scale features
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

print(df.head())
print(df.shape)

df.columns

# train-test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
    age  sex   cp  trestbps   chol  fbs  restecg  thalach  exang  oldpeak  \
0  63.0  1.0  1.0     145.0  233.0  1.0      2.0    150.0    0.0      2.3   
1  67.0  1.0  4.0     160.0  286.0  0.0      2.0    108.0    1.0      1.5   
2  67.0  1.0  4.0     120.0  229.0  0.0      2.0    129.0    1.0      2.6   
3  37.0  1.0  3.0     130.0  250.0  0.0      0.0    187.0    0.0      3.5   
4  41.0  0.0  2.0     130.0  204.0  0.0      2.0    172.0    0.0      1.4   

   slope   ca  thal  num  
0    3.0  0.0   6.0    0  
1    2.0  3.0   3.0    1  
2    2.0  2.0   7.0    1  
3    3.0  0.0   3.0    0  
4    1.0  0.0   3.0    0  
(297, 14)


*Manual Implementation*

In [None]:
import numpy as np

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Training Logistic Regression from scratch
def train_logistic_regression(X, y, lr=0.01, epochs=2000):
    m, n = X.shape
    W = np.zeros((n, 1))
    b = 0

    for i in range(epochs):
        # Forward propagation
        z = np.dot(X, W) + b
        y_pred = sigmoid(z)

        # Cost (binary cross entropy)
        cost = -(1/m) * np.sum(y * np.log(y_pred + 1e-9) + (1-y)*np.log(1-y_pred + 1e-9))

        # Gradients
        dW = (1/m) * np.dot(X.T, (y_pred - y))
        db = (1/m) * np.sum(y_pred - y)

        # Update weights
        W -= lr * dW
        b -= lr * db

        # Print loss occasionally
        if i % 200 == 0:
            print(f"Epoch {i}, Loss = {cost:.4f}")

    return W, b

# Train the model
W, b = train_logistic_regression(X_train, y_train)

# Prediction
def predict(X, W, b):
    z = np.dot(X, W) + b
    y_pred = sigmoid(z)
    return (y_pred >= 0.5).astype(int)

# Evaluate
y_pred_test = predict(X_test, W, b)
accuracy = np.mean(y_pred_test == y_test)
print("Manual Logistic Regression Accuracy:", accuracy)


Epoch 0, Loss = 0.6931
Epoch 200, Loss = 0.4442
Epoch 400, Loss = 0.4012
Epoch 600, Loss = 0.3850
Epoch 800, Loss = 0.3768
Epoch 1000, Loss = 0.3718
Epoch 1200, Loss = 0.3685
Epoch 1400, Loss = 0.3662
Epoch 1600, Loss = 0.3645
Epoch 1800, Loss = 0.3632
Manual Logistic Regression Accuracy: 0.9


*Built-in Method*

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

model = LogisticRegression(max_iter=2000)

model.fit(X_train, y_train.ravel())

y_pred = model.predict(X_test)

print("Sklearn Logistic Regression Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Sklearn Logistic Regression Accuracy: 0.8666666666666667
              precision    recall  f1-score   support

           0       0.89      0.89      0.89        36
           1       0.83      0.83      0.83        24

    accuracy                           0.87        60
   macro avg       0.86      0.86      0.86        60
weighted avg       0.87      0.87      0.87        60

