In [None]:
# Cell 1: Imports
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Cell 2: Load + preprocess
df = pd.read_csv("../data/heart_disease_uci.csv")
df = df.drop(columns=["id", "dataset"], errors="ignore")
df["target"] = (df["num"] > 0).astype(int)
df = df.drop(columns=["num"])

X = pd.get_dummies(df.drop(columns=["target"]), drop_first=True).fillna(0)
y = df["target"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Cell 3: Train models
models = {
    "Logistic Regression": LogisticRegression(max_iter=500),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "SVM": SVC()
}

results = []
for name, clf in models.items():
    clf.fit(X_train, y_train)
    preds = clf.predict(X_test)
    results.append([
        name,
        accuracy_score(y_test, preds),
        precision_score(y_test, preds),
        recall_score(y_test, preds),
        f1_score(y_test, preds)
    ])

import pandas as pd
results_df = pd.DataFrame(results, columns=["Model","Accuracy","Precision","Recall","F1"])
print(results_df)


Unnamed: 0,Model,Accuracy,Precision,Recall,F1
0,Logistic Regression,0.831522,0.838095,0.862745,0.850242
1,Decision Tree,0.782609,0.792453,0.823529,0.807692
2,Random Forest,0.831522,0.831776,0.872549,0.851675
3,SVM,0.744565,0.747748,0.813725,0.779343
