In [127]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf


In [128]:
## import data
set_1_path = 'data/heart.csv'
heart_df = pd.read_csv(set_1_path)
heart_df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [129]:
heart_df.describe()

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,HeartDisease
count,918.0,918.0,918.0,918.0,918.0,918.0,918.0
mean,53.510893,132.396514,198.799564,0.233115,136.809368,0.887364,0.553377
std,9.432617,18.514154,109.384145,0.423046,25.460334,1.06657,0.497414
min,28.0,0.0,0.0,0.0,60.0,-2.6,0.0
25%,47.0,120.0,173.25,0.0,120.0,0.0,0.0
50%,54.0,130.0,223.0,0.0,138.0,0.6,1.0
75%,60.0,140.0,267.0,0.0,156.0,1.5,1.0
max,77.0,200.0,603.0,1.0,202.0,6.2,1.0


In [130]:
heart_df['FastingBS'].value_counts()

0    704
1    214
Name: FastingBS, dtype: int64

In [131]:
# there are four different chest pain types
heart_df['ChestPainType'].value_counts()

ASY    496
NAP    203
ATA    173
TA      46
Name: ChestPainType, dtype: int64

In [132]:
# three different resting ecg responses
heart_df['RestingECG'].value_counts()

Normal    552
LVH       188
ST        178
Name: RestingECG, dtype: int64

In [133]:
# three st_slope values
heart_df['ST_Slope'].value_counts()

Flat    460
Up      395
Down     63
Name: ST_Slope, dtype: int64

In [134]:
# One hot encoding of categorical data values:
heart_df_encoded= pd.get_dummies(heart_df)
heart_df_encoded.head()

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,HeartDisease,Sex_F,Sex_M,ChestPainType_ASY,...,ChestPainType_NAP,ChestPainType_TA,RestingECG_LVH,RestingECG_Normal,RestingECG_ST,ExerciseAngina_N,ExerciseAngina_Y,ST_Slope_Down,ST_Slope_Flat,ST_Slope_Up
0,40,140,289,0,172,0.0,0,0,1,0,...,0,0,0,1,0,1,0,0,0,1
1,49,160,180,0,156,1.0,1,1,0,0,...,1,0,0,1,0,1,0,0,1,0
2,37,130,283,0,98,0.0,0,0,1,0,...,0,0,0,0,1,1,0,0,0,1
3,48,138,214,0,108,1.5,1,1,0,1,...,0,0,0,1,0,0,1,0,1,0
4,54,150,195,0,122,0.0,0,0,1,0,...,1,0,0,1,0,1,0,0,0,1


In [135]:
# Define input data and target:
X = heart_df_encoded.drop(['HeartDisease','ExerciseAngina_N',	'Sex_F'], axis=1)
y = heart_df_encoded['HeartDisease']

In [136]:
# Train, test, split:
X_train, X_test, y_train, y_test = train_test_split(X, y,random_state=54)

In [137]:
# Load saved model and scaler from files:
import pickle


scaler_file = 'best_nn_scaler.pkl'
loaded_model=tf.keras.models.load_model(
    'static/best_model.h5'
)
loaded_scaler = pickle.load(open(scaler_file,'rb'))

print(f"Scaler loaded from file: {scaler_file}")

Scaler loaded from file: best_nn_scaler.pkl


In [138]:
X_test_scaled = loaded_scaler.transform(X_test)
model_loss, model_accuracy = loaded_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

8/8 - 0s - loss: 0.1739 - accuracy: 0.9609 - 71ms/epoch - 9ms/step
Loss: 0.17386700212955475, Accuracy: 0.960869550704956


In [139]:
# Make some testing data:
Age = 65
Sex = "M"
ChestPainType = "ATA"
RestingBP = 185
Cholesterol = 333
FastingBS = 0
RestingECG = "Normal"
MaxHR = 150
ExerciseAngina = "Y"
Oldpeak = 1.5
ST_Slope = "Flat"

column_headers = ["Age","RestingBP","Cholesterol","FastingBS","MaxHR","Oldpeak",
                  "Sex_M",
                  "ChestPainType_ASY","ChestPainType_ATA","ChestPainType_NAP","ChestPainType_TA",
                  "RestingECG_LVH","RestingECG_Normal","RestingECG_ST",
                  "ExerciseAngina_Y",
                  "ST_Slope_Down","ST_Slope_Flat","ST_Slope_Up"]

data_row = []
data_row.append(Age)
data_row.append(RestingBP)
data_row.append(Cholesterol)
data_row.append(FastingBS)
data_row.append(MaxHR)
data_row.append(Oldpeak)
if Sex == "F":
    data_row.append(0)
else:
    data_row.append(1)
if ChestPainType == "ASY":
    data_row.append(1)
    data_row.append(0)
    data_row.append(0)
    data_row.append(0)
elif ChestPainType == "ATA":
    data_row.append(0)
    data_row.append(1)
    data_row.append(0)
    data_row.append(0)
elif ChestPainType == "NAP":
    data_row.append(0)
    data_row.append(0)
    data_row.append(1)
    data_row.append(0)    
else:
    data_row.append(0)
    data_row.append(0)
    data_row.append(0)
    data_row.append(1)
if RestingECG == "LVH":
    data_row.append(1)
    data_row.append(0)
    data_row.append(0)
elif RestingECG == "Normal":
    data_row.append(0)
    data_row.append(1)
    data_row.append(0)
else:
    data_row.append(0)
    data_row.append(0)
    data_row.append(1)
if ExerciseAngina == "N":
    data_row.append(0)
else:
    data_row.append(1)
if ST_Slope == "Down":
    data_row.append(1)
    data_row.append(0)
    data_row.append(0)
elif ST_Slope == "Flat":
    data_row.append(0)
    data_row.append(1)
    data_row.append(0)
else:
    data_row.append(0)
    data_row.append(0)
    data_row.append(1)
    
test_data = pd.DataFrame([data_row], columns=column_headers)

test_data

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,Sex_M,ChestPainType_ASY,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,RestingECG_LVH,RestingECG_Normal,RestingECG_ST,ExerciseAngina_Y,ST_Slope_Down,ST_Slope_Flat,ST_Slope_Up
0,65,185,333,0,150,1.5,1,0,1,0,0,0,1,0,1,0,1,0


In [140]:
test_data_scaled = loaded_scaler.transform(test_data)

y = loaded_model.predict(test_data_scaled)
print('Your likelihood of having heart disease is ',f'{y[0][0]*100}%')


Your likelihood of having heart disease is  95.85505723953247%
