In [1]:
import os
import pandas as pd 
import numpy as np
import joblib
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score

In [2]:
df=pd.read_csv(r'C:\Heart_Disease_Prediction\Data_Sets\heart.csv')

In [3]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [4]:
df.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [5]:
df.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')

In [6]:
X=df.drop("target",axis=1)
y=df["target"]

In [7]:
df.head(
)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [8]:
numerical_cols= ["age","trestbps","chol","thalach","oldpeak"]
categorical_cols= ["sex","cp","fbs","restecg","exang","slope","ca","thal"]

In [9]:
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numerical_cols),
        ("cat", OneHotEncoder(drop="first"), categorical_cols)
    ]
)

In [10]:
models = {
    "Logistic_Regression": LogisticRegression(),
    "Decision_Tree_Classifier": DecisionTreeClassifier(random_state=42),
    "Random_Forest_Classifier": RandomForestClassifier(n_estimators=100, random_state=42),
    "KNN_Classifier": KNeighborsClassifier(n_neighbors=3),
    "SVC": SVC(kernel="rbf")
}

In [11]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [12]:
os.makedirs("ML_Model", exist_ok=True)


In [None]:
for name, model in models.items():
    pipe = Pipeline(steps=[("preprocessor", preprocessor),
                           ("classifier", model)])
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)

    print(f"\n{name} Results:")
    print(" Accuracy:", accuracy_score(y_test, y_pred))
    print("  Precision:", precision_score(y_test, y_pred))
    print("  Recall:", recall_score(y_test, y_pred))
    print("  F1score", f1_score(y_test, y_pred))

    
    # Save model
    joblib.dump(pipe, f"ML_Model/{name}_model.pkl")
    print(f"   Saved: ML_Model/{name}_model.pkl")

nb_pipe = Pipeline(steps=[("preprocessor", preprocessor),
                          ("classifier", GaussianNB())])

nb_pipe.fit(X_train, y_train)
nb_pred = nb_pipe.predict(X_test)

print(f"\nNaive Bayes Results:")
print(" Accuracy:", accuracy_score(y_test, nb_pred))
print("  Precision:", precision_score(y_test, nb_pred))
print("  Recall:", recall_score(y_test, nb_pred))
print("  F1score", f1_score(y_test, nb_pred))

# Save Naive Bayes model separately
joblib.dump(nb_pipe, "ML_Model/Naive_Bayes_model.pkl")
print("  Saved: ML_Model/Naive_Bayes_model.pkl")


Logistic_Regression Results:
 Accuracy: 0.8195121951219512
  Precision: 0.7796610169491526
  Recall: 0.8932038834951457
  F1score 0.832579185520362
   Saved: ML_Model/Logistic_Regression_model.pkl

Decision_Tree_Classifier Results:
 Accuracy: 1.0
  Precision: 1.0
  Recall: 1.0
  F1score 1.0
   Saved: ML_Model/Decision_Tree_Classifier_model.pkl

Random_Forest_Classifier Results:
 Accuracy: 1.0
  Precision: 1.0
  Recall: 1.0
  F1score 1.0
   Saved: ML_Model/Random_Forest_Classifier_model.pkl

KNN_Classifier Results:
 Accuracy: 0.9073170731707317
  Precision: 0.9038461538461539
  Recall: 0.912621359223301
  F1score 0.9082125603864735
   Saved: ML_Model/KNN_Classifier_model.pkl

SVC Results:
 Accuracy: 0.8731707317073171
  Precision: 0.8290598290598291
  Recall: 0.941747572815534
  F1score 0.8818181818181818
   Saved: ML_Model/SVC_model.pkl

Naive Bayes Results:
 Accuracy: 0.8048780487804879
  Precision: 0.7692307692307693
  Recall: 0.8737864077669902
  F1score 0.8181818181818182
  Saved: