In [48]:
# set up
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import InputLayer

# suppress the warnings
import warnings
warnings.filterwarnings("ignore")

In [22]:
# load the dataset
data = pd.read_csv("heart_failure_clinical_records_dataset.csv")
data.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


In [7]:
# inspect the dataset
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 299 entries, 0 to 298
Data columns (total 13 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   age                       299 non-null    float64
 1   anaemia                   299 non-null    int64  
 2   creatinine_phosphokinase  299 non-null    int64  
 3   diabetes                  299 non-null    int64  
 4   ejection_fraction         299 non-null    int64  
 5   high_blood_pressure       299 non-null    int64  
 6   platelets                 299 non-null    float64
 7   serum_creatinine          299 non-null    float64
 8   serum_sodium              299 non-null    int64  
 9   sex                       299 non-null    int64  
 10  smoking                   299 non-null    int64  
 11  time                      299 non-null    int64  
 12  DEATH_EVENT               299 non-null    int64  
dtypes: float64(3), int64(10)
memory usage: 30.5 KB


In [10]:
# inspect the distribution of the outcome
print("The distribution of heart failure records in the dataset:")
print(Counter(data['DEATH_EVENT']))

The distribution of heart failure records in the dataset:
Counter({0: 203, 1: 96})


In [16]:
# split teh dataset into features and labels
labels = data['DEATH_EVENT']
features = data.drop('DEATH_EVENT', axis=1)

# split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# scale the numeric features using ColumnTransformer
numeric_cols = ['age','creatinine_phosphokinase','ejection_fraction','platelets','serum_creatinine','serum_sodium','time']
ct = ColumnTransformer(transformers=[
    ("scaler", StandardScaler(), numeric_cols)
])
X_train = ct.fit_transform(X_train)
X_test = ct.transform(X_test)

# convert the labels into categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [46]:
# design the model
model = Sequential()
model.add(InputLayer(shape=(X_train.shape[1],)))
model.add(Dense(12, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(2, activation="softmax"))

# compile the model
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# fit the model
history = model.fit(X_train, y_train, epochs=100, batch_size=16, verbose=2)

Epoch 1/100
15/15 - 1s - 36ms/step - accuracy: 0.6192 - loss: 0.7981
Epoch 2/100
15/15 - 0s - 2ms/step - accuracy: 0.6736 - loss: 0.7730
Epoch 3/100
15/15 - 0s - 2ms/step - accuracy: 0.6736 - loss: 0.7116
Epoch 4/100
15/15 - 0s - 2ms/step - accuracy: 0.7238 - loss: 0.6677
Epoch 5/100
15/15 - 0s - 2ms/step - accuracy: 0.7071 - loss: 0.6732
Epoch 6/100
15/15 - 0s - 2ms/step - accuracy: 0.7406 - loss: 0.6324
Epoch 7/100
15/15 - 0s - 2ms/step - accuracy: 0.6946 - loss: 0.6046
Epoch 8/100
15/15 - 0s - 2ms/step - accuracy: 0.7322 - loss: 0.6019
Epoch 9/100
15/15 - 0s - 2ms/step - accuracy: 0.7280 - loss: 0.6154
Epoch 10/100
15/15 - 0s - 2ms/step - accuracy: 0.7531 - loss: 0.5765
Epoch 11/100
15/15 - 0s - 2ms/step - accuracy: 0.7448 - loss: 0.5624
Epoch 12/100
15/15 - 0s - 2ms/step - accuracy: 0.7531 - loss: 0.5336
Epoch 13/100
15/15 - 0s - 2ms/step - accuracy: 0.7866 - loss: 0.4946
Epoch 14/100
15/15 - 0s - 2ms/step - accuracy: 0.7824 - loss: 0.5136
Epoch 15/100
15/15 - 0s - 2ms/step - accur

In [47]:
# evaluate the model
loss, acc = model.evaluate(X_test, y_test)
print("Loss: ", loss)
print("Accuracy: ", acc)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7715 - loss: 0.5582 
Loss:  0.5662959218025208
Accuracy:  0.7666666507720947


In [49]:
y_estimate = model.predict(X_test)
y_estimate = np.argmax(y_estimate, axis=1)
y_true = np.argmax(y_test, axis=1)
print(classification_report(y_true, y_estimate))

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
              precision    recall  f1-score   support

           0       0.72      0.97      0.83        35
           1       0.92      0.48      0.63        25

    accuracy                           0.77        60
   macro avg       0.82      0.73      0.73        60
weighted avg       0.81      0.77      0.75        60

