In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Load dataset
df = pd.read_csv("heart.csv")

# Step 1: Data Cleaning
# Replace zero values in 'RestingBP' and 'Cholesterol' with NaN, then impute with median
df['RestingBP'] = df['RestingBP'].fillna(df['RestingBP'].median())
df['Cholesterol'] = df['Cholesterol'].fillna(df['Cholesterol'].median())

# df['RestingBP'].fillna(df['RestingBP'].median(), inplace=True)
# df['Cholesterol'].fillna(df['Cholesterol'].median(), inplace=True)

# Step 2: Encoding categorical features
df = pd.get_dummies(df, columns=['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope'], drop_first=True)

# Step 3: Define features and target
X = df.drop('HeartDisease', axis=1)
y = df['HeartDisease']

# Step 4: Train-test split (80/20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 6: Train Models

# Naive Bayes
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
nb_preds = nb_model.predict(X_test)

# Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
dt_preds = dt_model.predict(X_test)

# Step 7: Evaluation Function
def evaluate_model(name, y_true, y_pred):
    print(f"\n{name} Evaluation")
    print("-" * 40)
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print("Classification Report:\n", classification_report(y_true, y_pred))

    

# Step 8: Evaluate Both Models
evaluate_model("Naive Bayes", y_test, nb_preds)
evaluate_model("Decision Tree", y_test, dt_preds)



Naive Bayes Evaluation
----------------------------------------
Accuracy: 0.8586956521739131
Confusion Matrix:
 [[68  9]
 [17 90]]
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.88      0.84        77
           1       0.91      0.84      0.87       107

    accuracy                           0.86       184
   macro avg       0.85      0.86      0.86       184
weighted avg       0.86      0.86      0.86       184


Decision Tree Evaluation
----------------------------------------
Accuracy: 0.8260869565217391
Confusion Matrix:
 [[62 15]
 [17 90]]
Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.81      0.79        77
           1       0.86      0.84      0.85       107

    accuracy                           0.83       184
   macro avg       0.82      0.82      0.82       184
weighted avg       0.83      0.83      0.83       184

