In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [4]:
df = pd.read_csv("Heart_Disease_Prediction.csv")
df.columns = df.columns.str.strip() 

In [12]:
df

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,Presence
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,Presence
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,Absence
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,52,1,3,172,199,1,0,162,0,0.5,1,0,7,Absence
266,44,1,2,120,263,0,0,173,0,0.0,1,0,7,Absence
267,56,0,2,140,294,0,2,153,0,1.3,2,0,3,Absence
268,57,1,4,140,192,0,0,148,0,0.4,2,0,6,Absence


In [14]:
X = df.drop('Heart Disease', axis=1)
y = df['Heart Disease']

In [22]:
#encode categorial columns
X = pd.get_dummies(X, drop_first=True)

In [18]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [24]:
models = {
    "Logistic Regression": LogisticRegression(),
    "SVM": SVC(),
    "K-NN": KNeighborsClassifier(),
    "Naive Bayes": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier()
}

In [32]:
results = []
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results.append({'Model': name, 'Accuracy': acc})
    print(f"-> {name}")
    print("Accuracy:", acc)
    print(classification_report(y_test, y_pred))
    print("-" * 50)

-> Logistic Regression
Accuracy: 0.9074074074074074
              precision    recall  f1-score   support

     Absence       0.91      0.94      0.93        33
    Presence       0.90      0.86      0.88        21

    accuracy                           0.91        54
   macro avg       0.91      0.90      0.90        54
weighted avg       0.91      0.91      0.91        54

--------------------------------------------------
-> SVM
Accuracy: 0.8888888888888888
              precision    recall  f1-score   support

     Absence       0.89      0.94      0.91        33
    Presence       0.89      0.81      0.85        21

    accuracy                           0.89        54
   macro avg       0.89      0.87      0.88        54
weighted avg       0.89      0.89      0.89        54

--------------------------------------------------
-> K-NN
Accuracy: 0.8148148148148148
              precision    recall  f1-score   support

     Absence       0.81      0.91      0.86        33
    Presen

In [36]:
df_results = pd.DataFrame(results).sort_values(by='Accuracy', ascending=False)
print("Model Ranking (Best to Worst):")
print(df_results)

Model Ranking (Best to Worst):
                 Model  Accuracy
0  Logistic Regression  0.907407
3          Naive Bayes  0.907407
1                  SVM  0.888889
5        Random Forest  0.888889
2                 K-NN  0.814815
4        Decision Tree  0.685185
