<a href="https://colab.research.google.com/github/5h33p/heart_failure_clinical_records/blob/main/deeplearning_classification_heart_failure_clinical_records_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing Dependencies

In [150]:
!pip install kagglehub



In [151]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from collections import Counter
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer
from sklearn.metrics import classification_report
from tensorflow.keras.utils import to_categorical
import numpy as np
import kagglehub

# ETL

In [152]:
path = kagglehub.dataset_download("andrewmvd/heart-failure-clinical-data")

# print("Path to dataset files:", path)

datset = pd.read_csv(path + '/heart_failure_clinical_records_dataset.csv')
datset.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


In [153]:
new_col_name = []

for col in dataset.columns:
  new_col_name.append(col.lower().strip().replace(' ', '_'))
dataset.columns = new_col_name
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 299 entries, 0 to 298
Data columns (total 13 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   age                       299 non-null    float64
 1   anaemia                   299 non-null    float64
 2   creatinine_phosphokinase  299 non-null    float64
 3   diabetes                  299 non-null    float64
 4   ejection_fraction         299 non-null    float64
 5   high_blood_pressure       299 non-null    float64
 6   platelets                 299 non-null    float64
 7   serum_creatinine          299 non-null    float64
 8   serum_sodium              299 non-null    float64
 9   sex                       299 non-null    float64
 10  smoking                   299 non-null    float64
 11  time                      299 non-null    float64
 12  death_event               299 non-null    float64
dtypes: float64(13)
memory usage: 30.5 KB


In [154]:
print(Counter(dataset.death_event))

Counter({0.0: 203, 1.0: 96})


In [155]:
y = dataset.death_event
X = dataset.drop(columns='death_event')

# Converting boolean variables
bool_var = X.columns[X.nunique()==2]
for col in bool_var:
  X[col] = X[col].astype(bool)

# Separating into train/test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=88)

# Getting numerical data to transform
num_var = X_train.select_dtypes(include=['int64', 'float64']).columns
ct = ColumnTransformer([('scaler', StandardScaler(), num_var)], remainder='passthrough')
X_train_scaled = pd.DataFrame(ct.fit_transform(X_train), columns=ct.get_feature_names_out())
X_test_scaled = pd.DataFrame(ct.transform(X_test), columns=ct.get_feature_names_out())
# X_train_scaled.head()

le = LabelEncoder()
y_train = le.fit_transform(y_train.astype(str))
y_test = le.transform(y_test.astype(str))

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# Criating Model

In [156]:
model = Sequential()
model.add(InputLayer(shape=(X_train_scaled.shape[1],)))
model.add(Dense(12, activation='relu'))
model.add(Dense(len(y_test[0]), activation='softmax'))

model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

# Training Model

In [None]:
history = model.fit(X_train_scaled, y_train, validation_batch_size=0.2, batch_size=16, epochs=100, verbose=1)

Epoch 1/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.3639 - loss: 0.9612   
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3948 - loss: 0.8653 
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4322 - loss: 0.8130 
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5195 - loss: 0.7329 
Epoch 5/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6545 - loss: 0.6387 
Epoch 6/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7135 - loss: 0.6066 
Epoch 7/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7451 - loss: 0.5613 
Epoch 8/100
[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 34ms/step - accuracy: 0.7500 - loss: 0.5844

# Evaluating Model

In [None]:
loss, acc = model.evaluate(X_test_scaled, y_test, verbose=0)
print(loss, acc)