In [42]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB  
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier

In [43]:
dataset=pd.read_csv('heart.csv')

In [44]:
dataset.isnull().sum()

Age               0
Sex               0
ChestPainType     0
RestingBP         0
Cholesterol       0
FastingBS         0
RestingECG        0
MaxHR             0
ExerciseAngina    0
Oldpeak           0
ST_Slope          0
HeartDisease      0
dtype: int64

In [45]:
dataset.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


## Encoding

In [46]:
attributes=[dataset.Sex,dataset.ChestPainType,dataset.RestingECG,dataset.ExerciseAngina,dataset.ST_Slope]

In [47]:
for i in attributes:
    dummies=pd.get_dummies(i)
    dataset=pd.concat([dataset,dummies],axis='columns')    

In [48]:
dataset.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,...,NAP,TA,LVH,Normal,ST,N,Y,Down,Flat,Up
0,40,M,ATA,140,289,0,Normal,172,N,0.0,...,0,0,0,1,0,1,0,0,0,1
1,49,F,NAP,160,180,0,Normal,156,N,1.0,...,1,0,0,1,0,1,0,0,1,0
2,37,M,ATA,130,283,0,ST,98,N,0.0,...,0,0,0,0,1,1,0,0,0,1
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,...,0,0,0,1,0,0,1,0,1,0
4,54,M,NAP,150,195,0,Normal,122,N,0.0,...,1,0,0,1,0,1,0,0,0,1


In [49]:
dataset=dataset.drop(columns=['Sex','ChestPainType','RestingECG','ExerciseAngina','ST_Slope'],axis=1)

In [50]:
dataset.head()

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,HeartDisease,F,M,ASY,...,NAP,TA,LVH,Normal,ST,N,Y,Down,Flat,Up
0,40,140,289,0,172,0.0,0,0,1,0,...,0,0,0,1,0,1,0,0,0,1
1,49,160,180,0,156,1.0,1,1,0,0,...,1,0,0,1,0,1,0,0,1,0
2,37,130,283,0,98,0.0,0,0,1,0,...,0,0,0,0,1,1,0,0,0,1
3,48,138,214,0,108,1.5,1,1,0,1,...,0,0,0,1,0,0,1,0,1,0
4,54,150,195,0,122,0.0,0,0,1,0,...,1,0,0,1,0,1,0,0,0,1


## Splitting the Features and the Target

In [51]:
X=dataset.drop(columns='HeartDisease', axis=1)
Y=dataset['HeartDisease']

## Standardizing the Data

In [52]:
S=StandardScaler()

In [53]:
X=S.fit_transform(X)

In [54]:
dataset.head()

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,HeartDisease,F,M,ASY,...,NAP,TA,LVH,Normal,ST,N,Y,Down,Flat,Up
0,40,140,289,0,172,0.0,0,0,1,0,...,0,0,0,1,0,1,0,0,0,1
1,49,160,180,0,156,1.0,1,1,0,0,...,1,0,0,1,0,1,0,0,1,0
2,37,130,283,0,98,0.0,0,0,1,0,...,0,0,0,0,1,1,0,0,0,1
3,48,138,214,0,108,1.5,1,1,0,1,...,0,0,0,1,0,0,1,0,1,0
4,54,150,195,0,122,0.0,0,0,1,0,...,1,0,0,1,0,1,0,0,0,1


In [55]:
dataset.to_csv('updated_heart.csv')

## Train Test Split

In [56]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.5,stratify=Y,random_state=0)

## Decision Tree Classifier

In [57]:
dt_classifier=DecisionTreeClassifier()

In [58]:
dt_classifier.fit(X_train,Y_train)
y_pred= dt_classifier.predict(X_test)
accuracy= accuracy_score(Y_test, y_pred)
precision=precision_score(Y_test,y_pred)
recall=recall_score(Y_test,y_pred)
f1=f1_score(Y_test,y_pred)
print(" Accuracy = %0.4f" % accuracy," Precision = %0.4f " % precision," Recall = %0.4f" % recall," F1 score = %0.4f " % f1)
    

 Accuracy = 0.7625  Precision = 0.7984   Recall = 0.7638  F1 score = 0.7807 


## Support Vector Machine

In [59]:
svm_classifier=svm.SVC(kernel='linear')

In [60]:
svm_classifier.fit(X_train,Y_train)
y_pred= svm_classifier.predict(X_test)
accuracy= accuracy_score(Y_test, y_pred)
precision=precision_score(Y_test,y_pred)
recall=recall_score(Y_test,y_pred)
f1=f1_score(Y_test,y_pred)
print(" Accuracy = %0.4f" % accuracy," Precision = %0.4f " % precision," Recall = %0.4f" % recall," F1 score = %0.4f " % f1)
    

 Accuracy = 0.8736  Precision = 0.8920   Recall = 0.8780  F1 score = 0.8849 


## Logistic Regression

In [61]:
lr_classifier=LogisticRegression()

In [62]:
lr_classifier.fit(X_train,Y_train)
y_pred= lr_classifier.predict(X_test)
accuracy= accuracy_score(Y_test, y_pred)
precision=precision_score(Y_test,y_pred)
recall=recall_score(Y_test,y_pred)
f1=f1_score(Y_test,y_pred)
print(" Accuracy = %0.4f" % accuracy," Precision = %0.4f " % precision," Recall = %0.4f" % recall," F1 score = %0.4f " % f1)
    

 Accuracy = 0.8824  Precision = 0.9065   Recall = 0.8780  F1 score = 0.8920 


## K Nearest Neighbor

In [63]:
knn_classifier=KNeighborsClassifier()

In [64]:
knn_classifier.fit(X_train,Y_train)
y_pred= knn_classifier.predict(X_test)
accuracy= accuracy_score(Y_test, y_pred)
precision=precision_score(Y_test,y_pred)
recall=recall_score(Y_test,y_pred)
f1=f1_score(Y_test,y_pred)
print(" Accuracy = %0.4f" % accuracy," Precision = %0.4f " % precision," Recall = %0.4f" % recall," F1 score = %0.4f " % f1)
    

 Accuracy = 0.8562  Precision = 0.8950   Recall = 0.8386  F1 score = 0.8659 


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


## Naive Bayes

In [65]:
nb_classifier = GaussianNB()  

In [66]:
nb_classifier.fit(X_train,Y_train)
y_pred= nb_classifier.predict(X_test)
accuracy= accuracy_score(Y_test, y_pred)
precision=precision_score(Y_test,y_pred)
recall=recall_score(Y_test,y_pred)
f1=f1_score(Y_test,y_pred)
print(" Accuracy = %0.4f" % accuracy," Precision = %0.4f " % precision," Recall = %0.4f" % recall," F1 score = %0.4f " % f1)
    

 Accuracy = 0.8627  Precision = 0.9030   Recall = 0.8425  F1 score = 0.8717 


## Voting Classifier

In [67]:
voting_model=VotingClassifier(estimators=[('dtc',DecisionTreeClassifier()),('knn',KNeighborsClassifier()),('svc',svm.SVC(kernel='linear')),('lrc',LogisticRegression()),('nbc',GaussianNB())],voting='hard')

In [68]:
voting_model.fit(X_train,Y_train)
y_pred= voting_model.predict(X_test)
accuracy= accuracy_score(Y_test, y_pred)
precision=precision_score(Y_test,y_pred)
recall=recall_score(Y_test,y_pred)
f1=f1_score(Y_test,y_pred)
print(" Accuracy = %0.4f" % accuracy," Precision = %0.4f " % precision," Recall = %0.4f" % recall," F1 score = %0.4f " % f1)


 Accuracy = 0.8845  Precision = 0.9102   Recall = 0.8780  F1 score = 0.8938 


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


| Testing interval | Accuracy | Precision | Recall | F1 score |
|-----------------------|----------|-----------|--------|----------|
| 1-153 | 0.9333 | 0.9259 | 0.8929 | 0.9091 |
| 1-306 | 0.9495 | 0.9000 | 0.9730 | 0.9351 |
| 1-459 | 0.9244 | 0.9015 | 0.9675 | 0.9333 |
| 1-612 | 0.9167 | 0.9140 | 0.9497 | 0.9315 |
| 1-765 | 0.8750 | 0.8944 | 0.8881 | 0.8912 |
| 1-918 | 0.8911 | 0.9250 | 0.8740 | 0.8988 |

# Input Test Cases

In [69]:
import pickle

In [70]:
filename='trained_model1.sav'
pickle.dump(voting_model,open(filename,'wb'))

In [71]:
loaded_model=pickle.load(open('trained_model1.sav','rb'))

In [76]:
input=(49,124,201,0,164,0.0,0,1,0,0,1,0,0,0,1,0,1,0,0,0)

input_nparray=np.asarray(input)

input_nparray_reshaped=input_nparray.reshape(1,-1)

standard_input= S.transform(input_nparray_reshaped)

prediction=loaded_model.predict(standard_input)

if(prediction[0]==0):
    print('The person is not having Heart Disease')
else:
    print('The person has Heart Disease') 

The person has Heart Disease


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
