# IMPORT LIBRARIES AND DATA

---



> Dataset Source : https://www.kaggle.com/datasets/fedesoriano/heart-failure-prediction

> Using only sklearn

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

In [2]:
#upload the heart.csv file beforehand
heart=pd.read_csv('/content/heart.csv')

# EXPLORATION - DATA UNDERSTANDING

---



In [3]:
heart

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0
...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,M,TA,110,264,0,Normal,132,N,1.2,Flat,1
914,68,M,ASY,144,193,1,Normal,141,N,3.4,Flat,1
915,57,M,ASY,130,131,0,Normal,115,Y,1.2,Flat,1
916,57,F,ATA,130,236,0,LVH,174,N,0.0,Flat,1


In [4]:
print(len(heart['RestingBP'].unique()))
print(len(heart['Cholesterol'].unique()))
print(len(heart['MaxHR'].unique()))

67
222
119


# CLEANING AND PREPROCESSING

---



> Encoding Categorical Attributes



In [5]:
from sklearn import preprocessing
LE=preprocessing.LabelEncoder()
heart['Sex']=LE.fit_transform(heart['Sex'])
heart['ChestPainType']=LE.fit_transform(heart['ChestPainType'])
heart['RestingECG']=LE.fit_transform(heart['RestingECG'])
heart['ExerciseAngina']=LE.fit_transform(heart['ExerciseAngina'])
heart['ST_Slope']=LE.fit_transform(heart['ST_Slope'])

> Feature Selection

In [6]:
heart_x=heart.drop(['HeartDisease'],axis=1) # input features
heart_y=heart['HeartDisease'] # output features

> Split Train and Test

In [7]:
#train-test split
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(heart_x,heart_y,test_size=0.3,random_state=5)

# LOGISTIC REGRESSION

---



> Fitting Model

In [8]:
from sklearn import linear_model
log_reg=linear_model.LogisticRegression(random_state=5,max_iter=700)
log_reg.fit(x_train,y_train)

LogisticRegression(max_iter=700, random_state=5)

> Confusion Matrix and Evaluation

In [9]:
from sklearn.metrics import confusion_matrix
predictions=log_reg.predict(x_test)
cm = confusion_matrix(y_test,predictions )

TN, FP, FN, TP = confusion_matrix(y_test, predictions).ravel()

print('True Positive(TP)  = ', TP)
print('False Positive(FP) = ', FP)
print('True Negative(TN)  = ', TN)
print('False Negative(FN) = ', FN)

accuracy =  (TP+TN) /(TP+FP+TN+FN)

print('Accuracy of the binary classification = {:0.3f}'.format(accuracy))

True Positive(TP)  =  143
False Positive(FP) =  18
True Negative(TN)  =  101
False Negative(FN) =  14
Accuracy of the binary classification = 0.884


# KNN

---



> Fitting Model

In [10]:
from sklearn import neighbors
knn=neighbors.KNeighborsClassifier()
knn.fit(x_train,y_train)

KNeighborsClassifier()

> Confusion Matrix

In [11]:
from sklearn.metrics import confusion_matrix
predictions=knn.predict(x_test)
cm = confusion_matrix(y_test,predictions )

TN, FP, FN, TP = confusion_matrix(y_test, predictions).ravel()

print('True Positive(TP)  = ', TP)
print('False Positive(FP) = ', FP)
print('True Negative(TN)  = ', TN)
print('False Negative(FN) = ', FN)

accuracy =  (TP+TN) /(TP+FP+TN+FN)

print('Accuracy of the binary classification = {:0.3f}'.format(accuracy))

True Positive(TP)  =  112
False Positive(FP) =  40
True Negative(TN)  =  79
False Negative(FN) =  45
Accuracy of the binary classification = 0.692


# SVM

---



> Fitting Model

In [12]:
from sklearn import svm
SVM=svm.SVC(random_state=5)
SVM.fit(x_train,y_train)

SVC(random_state=5)

 > Confusion Matrix

In [13]:
from sklearn.metrics import confusion_matrix
predictions=SVM.predict(x_test)
cm = confusion_matrix(y_test,predictions )

TN, FP, FN, TP = confusion_matrix(y_test, predictions).ravel()

print('True Positive(TP)  = ', TP)
print('False Positive(FP) = ', FP)
print('True Negative(TN)  = ', TN)
print('False Negative(FN) = ', FN)

accuracy =  (TP+TN) /(TP+FP+TN+FN)

print('Accuracy of the binary classification = {:0.3f}'.format(accuracy))

True Positive(TP)  =  115
False Positive(FP) =  34
True Negative(TN)  =  85
False Negative(FN) =  42
Accuracy of the binary classification = 0.725


# Decision Tree

---



> Fitting Model

In [14]:
from sklearn import tree
DT=tree.DecisionTreeClassifier(random_state=5)
DT.fit(x_train,y_train)

DecisionTreeClassifier(random_state=5)

> Confusion Matrix

In [15]:
from sklearn.metrics import confusion_matrix
predictions=DT.predict(x_test)
cm = confusion_matrix(y_test,predictions )

TN, FP, FN, TP = confusion_matrix(y_test, predictions).ravel()

print('True Positive(TP)  = ', TP)
print('False Positive(FP) = ', FP)
print('True Negative(TN)  = ', TN)
print('False Negative(FN) = ', FN)

accuracy =  (TP+TN) /(TP+FP+TN+FN)

print('Accuracy of the binary classification = {:0.3f}'.format(accuracy))

True Positive(TP)  =  120
False Positive(FP) =  27
True Negative(TN)  =  92
False Negative(FN) =  37
Accuracy of the binary classification = 0.768


# Naive Bayes

---



> Fitting Model

In [16]:
from sklearn.naive_bayes import GaussianNB
gnb=GaussianNB()
gnb.fit(x_train,y_train)

GaussianNB()

> Confusion Matrix

In [17]:
from sklearn.metrics import confusion_matrix
predictions=gnb.predict(x_test)
cm = confusion_matrix(y_test,predictions )

TN, FP, FN, TP = confusion_matrix(y_test, predictions).ravel()

print('True Positive(TP)  = ', TP)
print('False Positive(FP) = ', FP)
print('True Negative(TN)  = ', TN)
print('False Negative(FN) = ', FN)

accuracy =  (TP+TN) /(TP+FP+TN+FN)

print('Accuracy of the binary classification = {:0.3f}'.format(accuracy))

True Positive(TP)  =  143
False Positive(FP) =  16
True Negative(TN)  =  103
False Negative(FN) =  14
Accuracy of the binary classification = 0.891
