In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score


In [2]:
df = pd.read_csv("../data/heart_disease_clean.csv")
X = df.drop(columns=['target'])
y = df['target']

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(random_state=42),
    "SVM": SVC(probability=True, random_state=42)
}

In [4]:
results = []
for name, model in models.items():
    pipe = Pipeline([("scaler", StandardScaler()), ("clf", model)])
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    y_proba = pipe.predict_proba(X_test)[:,1]
    results.append({
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1": f1_score(y_test, y_pred),
        "ROC_AUC": roc_auc_score(y_test, y_proba)
    })

results_df = pd.DataFrame(results)
print(results_df)

                 Model  Accuracy  Precision    Recall        F1   ROC_AUC
0  Logistic Regression  0.833333   0.846154  0.785714  0.814815  0.949777
1        Decision Tree  0.683333   0.680000  0.607143  0.641509  0.678571
2        Random Forest  0.866667   0.884615  0.821429  0.851852  0.941406
3                  SVM  0.850000   0.880000  0.785714  0.830189  0.954241


In [5]:
results_df.to_csv("../results/supervised_results.csv", index=False)

In [6]:
results_df = pd.read_csv("../results/supervised_results.csv")

with open("../results/evaluation_metrics.txt", "w") as f:
    f.write("Supervised Learning Evaluation Metrics\n")
    f.write("="*50 + "\n\n")
    for i, row in results_df.iterrows():
        f.write(f"Model: {row['Model']}\n")
        f.write(f"Accuracy: {row['Accuracy']:.4f}\n")
        f.write(f"Precision: {row['Precision']:.4f}\n")
        f.write(f"Recall: {row['Recall']:.4f}\n")
        f.write(f"F1 Score: {row['F1']:.4f}\n")
        f.write(f"ROC AUC: {row['ROC_AUC']:.4f}\n")
        f.write("-"*50 + "\n")

print("Saved evaluation metrics to ../results/evaluation_metrics.txt")


Saved evaluation metrics to ../results/evaluation_metrics.txt
