In [53]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from collections import Counter
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer
from sklearn.metrics import classification_report
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam

In [2]:
# Read data without the index column
data = pd.read_csv("heart_failure.csv", index_col=[0])
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 299 entries, 0 to 298
Data columns (total 14 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   age                       299 non-null    float64
 1   anaemia                   299 non-null    object 
 2   creatinine_phosphokinase  299 non-null    int64  
 3   diabetes                  299 non-null    object 
 4   ejection_fraction         299 non-null    int64  
 5   high_blood_pressure       299 non-null    object 
 6   platelets                 299 non-null    float64
 7   serum_creatinine          299 non-null    float64
 8   serum_sodium              299 non-null    int64  
 9   sex                       299 non-null    object 
 10  smoking                   299 non-null    object 
 11  time                      299 non-null    int64  
 12  DEATH_EVENT               299 non-null    int64  
 13  death_event               299 non-null    object 
dtypes: float64(3), 

In [3]:
# Data statistics
data.describe(include="all")

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT,death_event
count,299.0,299,299.0,299,299.0,299,299.0,299.0,299.0,299,299,299.0,299.0,299
unique,,2,,2,,2,,,,2,2,,,2
top,,no,,no,,no,,,,yes,no,,,no
freq,,170,,174,,194,,,,194,203,,,203
mean,60.833893,,581.839465,,38.083612,,263358.029264,1.39388,136.625418,,,130.26087,0.32107,
std,11.894809,,970.287881,,11.834841,,97804.236869,1.03451,4.412477,,,77.614208,0.46767,
min,40.0,,23.0,,14.0,,25100.0,0.5,113.0,,,4.0,0.0,
25%,51.0,,116.5,,30.0,,212500.0,0.9,134.0,,,73.0,0.0,
50%,60.0,,250.0,,38.0,,262000.0,1.1,137.0,,,115.0,0.0,
75%,70.0,,582.0,,45.0,,303500.0,1.4,140.0,,,203.0,1.0,


In [4]:
# Check distribution of the death_event
Counter(data["death_event"])

Counter({'no': 203, 'yes': 96})

In [18]:
# Create the labels and features
y = data["death_event"]
x = data.drop(columns=["DEATH_EVENT", "death_event"])

# Encode categorical features
x = pd.get_dummies(x)

# Split data into train and test sets

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=15)

# Extract the columns of the numerical features
numerical_columns = x_train.select_dtypes(include = ["float64", "int"]).columns

# Standardize the numerical features
ct = ColumnTransformer([("numerical", StandardScaler(), numerical_columns)], remainder="passthrough")
x_train = ct.fit_transform(x_train)
x_test = ct.transform(x_test)

# Encode the labels
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)


In [45]:
# Design the neural network model
model = Sequential()

# Add the input layer
model.add(InputLayer(shape = (x.shape[1],)))
model.add(Dense(12, activation = "relu"))
model.add(Dense(2, activation = "softmax"))



In [46]:
# Compile the model 
model.compile(loss = "categorical_crossentropy", metrics = ["accuracy"], optimizer=Adam())

In [48]:
# Train the model
history = model.fit(x_train, y_train, epochs = 100, batch_size = 16, verbose = 1)

Epoch 1/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 643us/step - accuracy: 0.8580 - loss: 0.3020
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 643us/step - accuracy: 0.9053 - loss: 0.2581
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 500us/step - accuracy: 0.9100 - loss: 0.2247
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 589us/step - accuracy: 0.8795 - loss: 0.2785
Epoch 5/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 571us/step - accuracy: 0.8867 - loss: 0.2463
Epoch 6/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 572us/step - accuracy: 0.8796 - loss: 0.2403
Epoch 7/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 642us/step - accuracy: 0.9006 - loss: 0.2495
Epoch 8/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 572us/step - accuracy: 0.8640 - loss: 0.2653
Epoch 9/100
[1m15/15[0m [32m━

In [73]:
# Evaluate the model 
loss, acc = model.evaluate(x_test, y_test)
print("Loss: ", loss, "Accuracy:", acc)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 989us/step - accuracy: 0.7931 - loss: 0.5429
Loss:  0.5442015528678894 Accuracy: 0.7833333611488342


In [74]:
# Predict the outcome 

y_pred = model.predict(x_test, verbose=0)

# The selected code snippet, y_true = np.argmax(y_test, axis=1), is used to convert the one-hot encoded labels in the test set back to their original form.
# In the context of the provided code, y_test is a 2D array where each row represents a sample and each column corresponds to a class label. The values in the array are either 0 or 1, indicating whether the sample belongs to that class or not.
# The np.argmax function is used to find the index of the maximum value along the specified axis (in this case, axis=1). This operation returns an array where each element represents the class label with the highest probability for the corresponding sample in the y_test array.
# By assigning the result of np.argmax(y_test, axis=1) to y_true, we effectively convert the one-hot encoded labels back to their original form, which is necessary for evaluating the performance of the model using metrics such as accuracy or classification report.

y_true = np.argmax(y_test, axis=1)

In [78]:
# Classification report
print(classification_report(y_true, np.argmax(y_pred, axis = 1)))

              precision    recall  f1-score   support

           0       0.86      0.84      0.85        43
           1       0.61      0.65      0.63        17

    accuracy                           0.78        60
   macro avg       0.73      0.74      0.74        60
weighted avg       0.79      0.78      0.79        60



In [79]:
np.argmax(y_pred, axis = 1)

array([0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0], dtype=int64)

In [85]:
np.argmax(y_test, axis = 1)

array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0,
       1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0], dtype=int64)