In [34]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score, KFold, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
df=pd.read_csv('heart_ML.csv')

In [3]:
df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [4]:
X=df.iloc[:,:-1]

In [None]:
X

In [5]:
X['ST_Slope'].unique()

array(['Up', 'Flat', 'Down'], dtype=object)

In [7]:
X['ChestPainType']=X['ChestPainType'].map({
"ATA":0,
"NAP":1,
"ASY":2,
"TA":3
})

X["Sex"]=X["Sex"].map({
"M":0,
"F":1
})

X["RestingECG"]=X["RestingECG"].map({
"Normal":0,
"ST":1,
"LVH":2
})

X["ExerciseAngina"]=X["ExerciseAngina"].map({
"N":0,
"Y":1
})

X["ST_Slope"]=X["ST_Slope"].map({
"Up":0,
"Flat":1,
"Down":2
})

In [8]:
X

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
0,40,0,0,140,289,0,0,172,0,0.0,0
1,49,1,1,160,180,0,0,156,0,1.0,1
2,37,0,0,130,283,0,1,98,0,0.0,0
3,48,1,2,138,214,0,0,108,1,1.5,1
4,54,0,1,150,195,0,0,122,0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...
913,45,0,3,110,264,0,0,132,0,1.2,1
914,68,0,2,144,193,1,0,141,0,3.4,1
915,57,0,2,130,131,0,0,115,1,1.2,1
916,57,1,0,130,236,0,2,174,0,0.0,1


In [9]:
y=df['HeartDisease']

In [10]:
print(X.isna().sum())

Age               0
Sex               0
ChestPainType     0
RestingBP         0
Cholesterol       0
FastingBS         0
RestingECG        0
MaxHR             0
ExerciseAngina    0
Oldpeak           0
ST_Slope          0
dtype: int64


In [15]:
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=40)
scaler=StandardScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)

In [16]:
models={
"KNN":KNeighborsClassifier(),
"LogisticRegression": LogisticRegression(),
"DecisionTreeClassifier": DecisionTreeClassifier()
}



In [36]:
score=[]
for name, model in models.items():
    model.fit(X_train_scaled,y_train)
    y_pred=model.predict(X_test_scaled)
    print("Model: {} \n {}\n Confusion Matrix: \n{}\n".format(name,classification_report(y_test,y_pred), confusion_matrix(y_test,y_pred)))
    kf=KFold(n_splits=6,random_state=40,shuffle=True)
    results=cross_val_score(model,X_train_scaled,y_train,cv=kf, scoring="accuracy")
    score.append(np.mean(results))
    

Model: KNN 
               precision    recall  f1-score   support

           0       0.80      0.84      0.82        92
           1       0.89      0.86      0.88       138

    accuracy                           0.85       230
   macro avg       0.85      0.85      0.85       230
weighted avg       0.85      0.85      0.85       230

 Confusion Matrix: 
[[ 77  15]
 [ 19 119]]

Model: LogisticRegression 
               precision    recall  f1-score   support

           0       0.78      0.82      0.80        92
           1       0.87      0.85      0.86       138

    accuracy                           0.83       230
   macro avg       0.83      0.83      0.83       230
weighted avg       0.84      0.83      0.84       230

 Confusion Matrix: 
[[ 75  17]
 [ 21 117]]

Model: DecisionTreeClassifier 
               precision    recall  f1-score   support

           0       0.67      0.78      0.72        92
           1       0.84      0.75      0.79       138

    accuracy         

In [37]:
score

[0.8676709890668701, 0.8458555809814392, 0.8051741673023137]