In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve, classification_report

In [None]:
url = "https://raw.githubusercontent.com/anikannal/heart-disease-prediction/master/heart.csv"
data = pd.read_csv(url)

data.head()

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
# Check missing values
data.isnull().sum()

In [None]:
data.fillna(data.median(), inplace=True)

In [None]:
X = data.drop("target", axis=1)
y = data["target"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [None]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)

In [None]:
y_pred = model.predict(X_test_scaled)
y_pred_proba = model.predict_proba(X_test_scaled)[:, 1]

In [None]:
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

In [None]:
roc_auc = roc_auc_score(y_test, y_pred_proba)
roc_auc

In [None]:
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)

plt.figure(figsize=(6,5))
plt.plot(fpr, tpr, label=f"ROC-AUC = {roc_auc:.2f}")
plt.plot([0,1], [0,1], linestyle="--")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.show()

In [None]:
cv_scores = cross_val_score(
    model, X_train_scaled, y_train, cv=5, scoring="roc_auc"
)

print("Cross-Validation ROC-AUC Scores:", cv_scores)
print("Mean ROC-AUC:", cv_scores.mean())

In [None]:
print(classification_report(y_test, y_pred))