In [1]:
import numpy as np
import seaborn as sns
import pandas as pd
pd.set_option('display.max_columns', 50)

In [2]:
data = pd.read_csv('heart_num_0228.csv')
Q1 = data[['SleepHours', 'BMI']].quantile(q = 0.25)
Q3 = data[['SleepHours', 'BMI']].quantile(q = 0.75)
IQR = Q3-Q1
data_iqr = data[(data['SleepHours'] <= Q3['SleepHours']+IQR['SleepHours']*1.5)&(data['SleepHours'] >= Q1['SleepHours']-IQR['SleepHours']*1.5)]
data_iqr = data_iqr[(data_iqr['BMI'] <= Q3['BMI']+IQR['BMI']*1.5)&(data_iqr['BMI'] >= Q1['BMI']-IQR['BMI']*1.5)]
print('Original data:', len(data))
print('Remove Outlier data:', len(data_iqr))

Original data: 274034
Remove Outlier data: 261812


In [3]:
X = data_iqr.drop('HadHeartAttack', axis = 1)
y = data_iqr['HadHeartAttack']

In [4]:
from imblearn.over_sampling import SMOTE

In [5]:
smo = SMOTE(random_state = 1234)
X_re, y_re = smo.fit_resample(X, y)
print('Original Data:', len(X))
print('Resampled Data:', len(X_re))

Original Data: 261812
Resampled Data: 494774


In [6]:
from sklearn.model_selection import train_test_split

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_re, y_re, test_size = 0.2, random_state = 1234)

In [8]:
from sklearn.preprocessing import RobustScaler

In [9]:
rbs = RobustScaler()
X_trn = rbs.fit_transform(X_train)
X_tst = rbs.transform(X_test)

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from keras.callbacks import EarlyStopping
import tensorflow as tf




In [11]:
print(X_trn.shape)

(395819, 46)


In [12]:
es = EarlyStopping(monitor = 'accuracy')

In [13]:
model = Sequential()
model.add(Dense(50, input_dim = 46, activation = 'relu'))
model.add(Dense(28, activation = 'relu'))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(8, activation = 'relu'))
model.add(Dense(1, activation = 'sigmoid'))




In [14]:
from keras import backend as K

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [15]:
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy', f1_m, precision_m, recall_m])
history = model.fit(X_trn, y_train, epochs = 100, batch_size = 10, callbacks = [es])
print('\n Accraucy: %.2f' %(model.evaluate(X_trn, y_train)))


Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100


TypeError: must be real number, not list

In [16]:
loss, acc, f1, precision, recall = model.evaluate(X_trn, y_train)
print(f'Loss: {loss:.4f}')
print(f'Accuracy: {acc*100:.2f}%')
print(f'F1 Score: {f1:.4f}')

Loss: 0.1216
Accuracy: 95.21%
F1 Score: 0.9493
