In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv("heart.csv")

In [None]:
pd.set_option("display.max_columns" , None)
pd.set_option("display.expand_frame_repr" ,False)
pd.set_option("max_colwidth" ,None)

In [None]:
ch_mean = df.loc[df["Cholesterol"] !=0 , "Cholesterol"].mean()

In [None]:
df["Cholesterol"] = df["Cholesterol"].replace(0 ,ch_mean)
df["Cholesterol"] = df["Cholesterol"].round(2)

In [None]:
resting_bp_mean = df.loc[df["RestingBP"] !=0 , "RestingBP"].mean()
df["RestingBP"] = df["RestingBP"].replace(0 ,resting_bp_mean)
df["RestingBP"] = df["RestingBP"].round(2)


In [None]:
df_encode = pd.get_dummies(df , drop_first = True)

In [None]:
df_encode

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,HeartDisease,Sex_M,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,RestingECG_Normal,RestingECG_ST,ExerciseAngina_Y,ST_Slope_Flat,ST_Slope_Up
0,40,140.0,289.0,0,172,0.0,0,True,True,False,False,True,False,False,False,True
1,49,160.0,180.0,0,156,1.0,1,False,False,True,False,True,False,False,True,False
2,37,130.0,283.0,0,98,0.0,0,True,True,False,False,False,True,False,False,True
3,48,138.0,214.0,0,108,1.5,1,False,False,False,False,True,False,True,True,False
4,54,150.0,195.0,0,122,0.0,0,True,False,True,False,True,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,110.0,264.0,0,132,1.2,1,True,False,False,True,True,False,False,True,False
914,68,144.0,193.0,1,141,3.4,1,True,False,False,False,True,False,False,True,False
915,57,130.0,131.0,0,115,1.2,1,True,False,False,False,True,False,True,True,False
916,57,130.0,236.0,0,174,0.0,1,False,True,False,False,False,False,False,True,False


In [None]:
df_encode = df_encode.astype(int)

In [None]:
df_encode.columns

Index(['Age', 'RestingBP', 'Cholesterol', 'FastingBS', 'MaxHR', 'Oldpeak',
       'HeartDisease', 'Sex_M', 'ChestPainType_ATA', 'ChestPainType_NAP',
       'ChestPainType_TA', 'RestingECG_Normal', 'RestingECG_ST',
       'ExerciseAngina_Y', 'ST_Slope_Flat', 'ST_Slope_Up'],
      dtype='object')

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score , f1_score ,classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

In [None]:
X = df_encode.drop("HeartDisease" , axis=1)
y = df_encode["HeartDisease"]

In [None]:
X

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,Sex_M,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,RestingECG_Normal,RestingECG_ST,ExerciseAngina_Y,ST_Slope_Flat,ST_Slope_Up
0,40,140,289,0,172,0,1,1,0,0,1,0,0,0,1
1,49,160,180,0,156,1,0,0,1,0,1,0,0,1,0
2,37,130,283,0,98,0,1,1,0,0,0,1,0,0,1
3,48,138,214,0,108,1,0,0,0,0,1,0,1,1,0
4,54,150,195,0,122,0,1,0,1,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,110,264,0,132,1,1,0,0,1,1,0,0,1,0
914,68,144,193,1,141,3,1,0,0,0,1,0,0,1,0
915,57,130,131,0,115,1,1,0,0,0,1,0,1,1,0
916,57,130,236,0,174,0,0,1,0,0,0,0,0,1,0


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [None]:
X_train, X_test, y_train, y_test

(     Age  RestingBP  Cholesterol  FastingBS  MaxHR  Oldpeak  Sex_M  ChestPainType_ATA  ChestPainType_NAP  ChestPainType_TA  RestingECG_Normal  RestingECG_ST  ExerciseAngina_Y  ST_Slope_Flat  ST_Slope_Up
 795   42        120          240          1    194        0      1                  0                  1                 0                  1              0                 0              0            0
 25    36        130          209          0    178        0      1                  0                  1                 0                  1              0                 0              0            1
 84    56        150          213          1    125        1      1                  0                  0                 0                  1              0                 1              1            0
 10    37        130          211          0    142        0      0                  0                  1                 0                  1              0                 0         

In [None]:
scaler = StandardScaler()


In [None]:
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
models = {
    "Logistic Regression" : LogisticRegression(),
    "Decision Tree Classifier" : DecisionTreeClassifier(),
    "KNN" : KNeighborsClassifier(),
    "SVM"  :SVC(),
    "naive_bayes" : GaussianNB()
}


In [None]:
result = []

In [None]:
for name , model in models.items():
    model.fit(X_train_scaled , y_train)
    y_pred = model.predict(X_test_scaled)
    acc = accuracy_score(y_test , y_pred)
    f1 = f1_score(y_test , y_pred)
    result.append({
        "model" : name ,
        "Accuracy" : round(acc,4),
        "f1_score" : round(f1 , 4)
    })

In [None]:
result

[{'model': 'Logistic Regression', 'Accuracy': 0.8696, 'f1_score': 0.8846},
 {'model': 'Decision Tree Classifier', 'Accuracy': 0.7989, 'f1_score': 0.8177},
 {'model': 'KNN', 'Accuracy': 0.8641, 'f1_score': 0.8815},
 {'model': 'SVM', 'Accuracy': 0.8478, 'f1_score': 0.8667},
 {'model': 'naive_bayes', 'Accuracy': 0.8478, 'f1_score': 0.8614}]

In [None]:
import joblib
joblib.dump(models["Logistic Regression"],"Logistic.pkl")
joblib.dump(scaler,"Scaler.pkl")
joblib.dump(X.columns.tolist(),"Columns.pkl")

['Columns.pkl']

In [None]:
X.columns.tolist()

['Age',
 'RestingBP',
 'Cholesterol',
 'FastingBS',
 'MaxHR',
 'Oldpeak',
 'Sex_M',
 'ChestPainType_ATA',
 'ChestPainType_NAP',
 'ChestPainType_TA',
 'RestingECG_Normal',
 'RestingECG_ST',
 'ExerciseAngina_Y',
 'ST_Slope_Flat',
 'ST_Slope_Up']

In [None]:
df.tail(10)

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
908,63,M,ASY,140.0,187.0,0,LVH,144,Y,4.0,Up,1
909,63,F,ASY,124.0,197.0,0,Normal,136,Y,0.0,Flat,1
910,41,M,ATA,120.0,157.0,0,Normal,182,N,0.0,Up,0
911,59,M,ASY,164.0,176.0,1,LVH,90,N,1.0,Flat,1
912,57,F,ASY,140.0,241.0,0,Normal,123,Y,0.2,Flat,1
913,45,M,TA,110.0,264.0,0,Normal,132,N,1.2,Flat,1
914,68,M,ASY,144.0,193.0,1,Normal,141,N,3.4,Flat,1
915,57,M,ASY,130.0,131.0,0,Normal,115,Y,1.2,Flat,1
916,57,F,ATA,130.0,236.0,0,LVH,174,N,0.0,Flat,1
917,38,M,NAP,138.0,175.0,0,Normal,173,N,0.0,Up,0
