In [1]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from torch.utils.data import TensorDataset, random_split, DataLoader, RandomSampler, SubsetRandomSampler, SequentialSampler

In [2]:
dataframe = pd.read_csv('heart_failure_clinical_records_dataset.csv')
print(dataframe.head())

    age  anaemia  creatinine_phosphokinase  ...  smoking  time  DEATH_EVENT
0  75.0        0                       582  ...        0     4            1
1  55.0        0                      7861  ...        0     6            1
2  65.0        0                       146  ...        1     7            1
3  50.0        1                       111  ...        0     7            1
4  65.0        1                       160  ...        0     8            1

[5 rows x 13 columns]


In [3]:
def normalize_data(dataframe, name):
    data = dataframe[name]
    mean = np.mean(data)
    std = np.std(data)
    normalized_data = (data - mean) / std
    return normalized_data

In [4]:
dataframe['age'] = normalize_data(dataframe, 'age')
dataframe['creatinine_phosphokinase'] = normalize_data(dataframe, 'creatinine_phosphokinase')
dataframe['ejection_fraction'] = normalize_data(dataframe, 'ejection_fraction')
dataframe['platelets'] = normalize_data(dataframe, 'platelets')
dataframe['serum_creatinine'] = normalize_data(dataframe, 'serum_creatinine')
dataframe['serum_sodium'] = normalize_data(dataframe, 'serum_sodium')
dataframe['time'] = normalize_data(dataframe, 'time')

In [5]:
print(dataframe.head())

        age  anaemia  creatinine_phosphokinase  ...  smoking      time  DEATH_EVENT
0  1.192945        0                  0.000166  ...        0 -1.629502            1
1 -0.491279        0                  7.514640  ...        0 -1.603691            1
2  0.350833        0                 -0.449939  ...        1 -1.590785            1
3 -0.912335        1                 -0.486071  ...        0 -1.590785            1
4  0.350833        1                 -0.435486  ...        0 -1.577879            1

[5 rows x 13 columns]


In [6]:
input_cols = ['age', 'anaemia', 'creatinine_phosphokinase', 'diabetes', 'ejection_fraction', 'high_blood_pressure', 
              'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking', 'time']
inputs = dataframe [input_cols]
inputs = inputs.to_numpy(dtype=float)
pt_inputs = torch.tensor(inputs, dtype=torch.float64)
print(pt_inputs.shape)

torch.Size([299, 12])


In [7]:
labels = dataframe['DEATH_EVENT']
labels = labels.to_numpy(float)
pt_labels = torch.tensor(labels, dtype=torch.float64).unsqueeze(1)
print(pt_labels.shape)

torch.Size([299, 1])


In [8]:
dataset = TensorDataset(pt_inputs, pt_labels)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

print('{:>5,} training samples'.format(train_size))
print('{:>5,} validation samples'.format(val_size))

  239 training samples
   60 validation samples


In [9]:
batch_size = 64

train_dataloader = DataLoader(
            train_dataset,  # The training samples.
            sampler = RandomSampler(train_dataset), # Select batches randomly
            #sampler = SubsetRandomSampler(indeces, train_dataset),
            batch_size = batch_size # Trains with this batch size.
        )

# For validation the order doesn't matter, so we'll just read them sequentially.
validation_dataloader = DataLoader(
            val_dataset, # The validation samples.
            sampler = SequentialSampler(val_dataset), # Pull out batches sequentially.
            batch_size = batch_size # Evaluate with this batch size.
        )

print('{:,} training batches & {:,} validation batches'.format(len(train_dataloader), len(validation_dataloader)))

4 training batches & 1 validation batches


In [10]:
model = nn.Sequential(nn.Linear(12,16), nn.ReLU(), nn.Dropout(0.3), nn.Linear(16,8), nn.ReLU(), nn.Dropout(0.3), nn.Linear(8,1))
print(model)

Sequential(
  (0): Linear(in_features=12, out_features=16, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.3, inplace=False)
  (3): Linear(in_features=16, out_features=8, bias=True)
  (4): ReLU()
  (5): Dropout(p=0.3, inplace=False)
  (6): Linear(in_features=8, out_features=1, bias=True)
)


In [11]:
lr = 0.01
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [12]:
for batch in validation_dataloader:
    labels = batch[1]
    print(torch.sum(labels.squeeze()))

tensor(21., dtype=torch.float64)


In [13]:
from sklearn.metrics import f1_score
num_epochs = 1000
for epoch in range(num_epochs):
    for step, batch in enumerate(train_dataloader):
        inputs = batch[0].float()
        labels = batch[1]

        model.zero_grad()  

        predictions = model(inputs)
        loss = criterion(predictions, labels)  # Minibatch loss in `X` and `y`
        # Compute gradient on `l` with respect to [`w`, `b`]
        loss.sum().backward()
        optimizer.step()
    with torch.no_grad():
        model.eval()
        for batch in validation_dataloader:
            inputs = batch[0].float()
            labels = batch[1]

            predictions = model(inputs)
            loss = criterion(predictions, labels)

            ps = predictions.squeeze()
            preds = torch.tensor([1 if p > 0.5 else 0 for p in ps])
            labels = labels.squeeze()
            f1 = f1_score(preds.tolist(), labels.tolist())
            print(f'epoch {epoch + 1}, loss {float(loss.mean()):f}, f1 score {f1}')
        model.train()

epoch 1, loss 0.664033, f1 score 0.0
epoch 2, loss 0.663244, f1 score 0.0
epoch 3, loss 0.662364, f1 score 0.0
epoch 4, loss 0.661536, f1 score 0.0
epoch 5, loss 0.660746, f1 score 0.0
epoch 6, loss 0.659915, f1 score 0.0
epoch 7, loss 0.659145, f1 score 0.0
epoch 8, loss 0.658360, f1 score 0.0
epoch 9, loss 0.657560, f1 score 0.0
epoch 10, loss 0.656799, f1 score 0.0
epoch 11, loss 0.656060, f1 score 0.0
epoch 12, loss 0.655364, f1 score 0.0
epoch 13, loss 0.654615, f1 score 0.0
epoch 14, loss 0.653896, f1 score 0.0
epoch 15, loss 0.653205, f1 score 0.0
epoch 16, loss 0.652542, f1 score 0.0
epoch 17, loss 0.651820, f1 score 0.0
epoch 18, loss 0.651157, f1 score 0.0
epoch 19, loss 0.650475, f1 score 0.0
epoch 20, loss 0.649750, f1 score 0.0
epoch 21, loss 0.649115, f1 score 0.0
epoch 22, loss 0.648474, f1 score 0.0
epoch 23, loss 0.647824, f1 score 0.0
epoch 24, loss 0.647213, f1 score 0.0
epoch 25, loss 0.646564, f1 score 0.0
epoch 26, loss 0.645896, f1 score 0.0
epoch 27, loss 0.6452