In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve, classification_report
import matplotlib.pyplot as plt


columns = [
    "age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", 
    "thalach", "exang", "oldpeak", "slope", "ca", "thal", "target"
]
df = pd.read_csv("processed.cleveland.data", header = None, names = columns )

one_hot_encoded_cols = ['ca','restecg','cp', 'slope','thal']


df = df[~df.isin(['?']).any(axis=1)].reset_index(drop=True)


target = df['target']

df = df.drop('target',axis = 1)

selected_features = ['ca', 'cp', 'oldpeak', 'thal']
X = df[selected_features]
y= target


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(random_state=42, n_estimators=100),
    "SVM": SVC(probability=True, random_state=42)
}

results = {}
roc_curves = {}

for name, model in models.items():

    if name in ["Logistic Regression", "SVM"]:
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
        y_prob = model.predict_proba(X_test_scaled)[:, 1]
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1]
    

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred,average='weighted')
    rec = recall_score(y_test, y_pred,average='weighted')
    f1 = f1_score(y_test, y_pred,average='weighted')
    #auc_ovr = roc_auc_score(y_test, y_prob, multi_class='ovr', average='weighted')
    #auc_ovo = roc_auc_score(y_test, y_prob, multi_class='ovo', average='weighted')
    
    results[name] = {
        "Accuracy": acc,
        "Precision": prec,
        "Recall": rec,
        "F1-score": f1,
        #"AUC One-vs-Rest:": auc_ovr,
        #"AUC One-vs-One:" : auc_ovo
    }

   # fpr, tpr, _ = roc_curve(y_test, y_prob)
   # roc_curves[name] = (fpr, tpr)


results_df = pd.DataFrame(results).T
print("\nModel Performance:\n")
print(results_df)




Model Performance:

                     Accuracy  Precision    Recall  F1-score
Logistic Regression  0.583333   0.485122  0.583333  0.527275
Decision Tree        0.483333   0.477513  0.483333  0.478333
Random Forest        0.533333   0.527222  0.533333  0.529464
SVM                  0.550000   0.451151  0.550000  0.485017


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [11]:
results_df.to_csv("evaluation_metrics", index=False)