In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from collections import Counter
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer
from sklearn.metrics import classification_report
from tensorflow.keras.utils import to_categorical
import numpy as np

<h1 align = center> Data Loading and Preview </h1>

In [4]:
data = pd.read_csv('heart_failure.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 299 entries, 0 to 298
Data columns (total 15 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Unnamed: 0                299 non-null    int64  
 1   age                       299 non-null    float64
 2   anaemia                   299 non-null    object 
 3   creatinine_phosphokinase  299 non-null    int64  
 4   diabetes                  299 non-null    object 
 5   ejection_fraction         299 non-null    int64  
 6   high_blood_pressure       299 non-null    object 
 7   platelets                 299 non-null    float64
 8   serum_creatinine          299 non-null    float64
 9   serum_sodium              299 non-null    int64  
 10  sex                       299 non-null    object 
 11  smoking                   299 non-null    object 
 12  time                      299 non-null    int64  
 13  DEATH_EVENT               299 non-null    int64  
 14  death_even

In [5]:
data.head()

Unnamed: 0.1,Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT,death_event
0,0,75.0,no,582,no,20,yes,265000.0,1.9,130,yes,no,4,1,yes
1,1,55.0,no,7861,no,38,no,263358.03,1.1,136,yes,no,6,1,yes
2,2,65.0,no,146,no,20,no,162000.0,1.3,129,yes,yes,7,1,yes
3,3,50.0,yes,111,no,20,no,210000.0,1.9,137,yes,no,7,1,yes
4,4,65.0,yes,160,yes,20,no,327000.0,2.7,116,no,no,8,1,yes


In [7]:
print('Classes and number of values in the dataset {}'.format(Counter(data.death_event)))

Classes and number of values in the dataset Counter({'no': 203, 'yes': 96})


In [12]:
y = data['death_event']
x = data[['age','anaemia','creatinine_phosphokinase','diabetes','ejection_fraction','high_blood_pressure','platelets','serum_creatinine','serum_sodium','sex','smoking','time']]

<h1 align = center> Data preprocessing </h1>

<h4><p align =center> Converting the categorical features in the DataFrame instance x to one-hot encoding vectors and assign the result back to variable. </p></h4>

In [14]:
x = pd.get_dummies(x)

In [17]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 7)

### Data Normalization

In [20]:
ct = ColumnTransformer([('StandardScale', 
                         StandardScaler(), 
                         ['age','creatinine_phosphokinase','ejection_fraction','platelets','serum_creatinine','serum_sodium','time'])],
                         remainder = 'passthrough')

In [21]:
x_train = ct.fit_transform(x_train)
x_test = ct.transform(x_test)

<h1 align = center> Preparing Labels for Classification </h1>

In [24]:
le = LabelEncoder()
y_train = le.fit_transform(y_train.astype(str))
y_test = le.transform(y_test.astype(str))

In [25]:
y_train = to_categorical(y_train, dtype = 'int64')
y_test = to_categorical(y_test, dtype = 'int64')

<h1 align = center> Model Design </h1>

In [36]:
model = Sequential()
model.add(InputLayer(x_train.shape[1],))
model.add(Dense(12, activation = 'relu'))
model.add(Dense(2, activation= 'softmax'))

model.compile(loss = 'categorical_crossentropy', metrics = ['accuracy'], optimizer = 'adam')


model.fit(x_train, y_train, verbose = 1, epochs = 100, batch_size = 16)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7fe48c72d650>

<h1 align = center> Model Evaluation </h1>

In [41]:
loss, accuracy  = model.evaluate(x_test, y_test)
print('Model Loss: {} \nModel Accuracy: {}'.format(loss, accuracy))

Model Loss: 0.3380567729473114 
Model Accuracy: 0.8444444537162781


<h1 align = center> Generating classification report </h1>

In [44]:
y_estimate = model.predict(x_test)

y_estimate = np.argmax(y_estimate, axis = 1)
y_true = np.argmax(y_test, axis = 1)

print(classification_report(y_true, y_estimate))

              precision    recall  f1-score   support

           0       0.87      0.92      0.90        65
           1       0.76      0.64      0.70        25

    accuracy                           0.84        90
   macro avg       0.82      0.78      0.80        90
weighted avg       0.84      0.84      0.84        90

