In [9]:
import os

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchmetrics as metrics

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping


# Preprocess


In [10]:
class ECGDataset(Dataset):
    def __init__(self, root_dir: str, window: int, split: str, normalize: bool):
        assert split in ['train', 'test'], f'invalid split: {split}'
        self.root_dir = root_dir
        self.window = window
        self.normalize = normalize

        self.sample_files = [os.path.join(root_dir, f'samples_{x}.csv')
                             for x in range(1, 74)
                             if x != 15]
        self.annotation_files = [os.path.join(root_dir, f'annotation_{x}.csv')
                                 for x in range(1, 74)
                                 if x != 15]

        self.samples = np.vstack([pd.read_csv(f, names=['ECG'], skiprows=2, usecols=[1]).to_numpy()
                                 for f in self.sample_files])
        if normalize:
            self.sample_std = np.std(self.samples)
            self.sample_mean = np.mean(self.samples)
            self.samples = self.samples.reshape(-1, 500)-self.sample_mean/self.sample_std
        self.samples = self.samples.reshape(-1, 500)

        self.annotation = np.vstack([pd.read_csv(f).to_numpy()
                                    for f in self.annotation_files])
        if normalize:
            self.annotation_std = np.std(self.annotation)
            self.annotation_mean = np.mean(self.annotation)

        self.annotation = self.annotation.reshape(-1, 500)

        if split == 'train':
            self.samples = self.samples[:1300]
            self.annotation = self.annotation[:1300]
        elif split == 'test':
            self.samples = self.samples[1300:]
            self.annotation = self.annotation[1300:]

    def __len__(self) -> int:
        return self.samples.shape[0]

    def __getitem__(self, idx: int):
        output = np.zeros((500-self.window+1, self.window*2-1), dtype=np.float32)
        labels = np.zeros(500-self.window+1, dtype=np.int32)
        for i in range(500-self.window+1):
            samples = self.samples[idx, i:i+self.window]
            if self.normalize:
                annotations = (self.annotation[idx, i:i+self.window-1]-self.annotation_mean)/self.annotation_std
            else:
                annotations = self.annotation[idx, i:i+self.window-1]
            output[i, :] = np.hstack([samples,annotations]).reshape(-1)
            labels[i] = self.annotation[idx, i+self.window-1]
        return torch.from_numpy(output), torch.from_numpy(labels).long()


# LSTM Model


In [11]:
class ECGClassifierModel(pl.LightningModule):
    def __init__(self, input_size: int, hidden_size: int, num_layers: int, num_classes: int):
        super(ECGClassifierModel, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_classes = num_classes

        self.lstm = nn.LSTM(input_size=input_size,
                            hidden_size=hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True)

        self.fc1 = nn.Linear(hidden_size*2, self.num_classes)
        # self.fc2 = nn.Linear(256, self.num_classes)

        self.loss = nn.CrossEntropyLoss()
        self.accuracy = metrics.Accuracy(num_classes=self.num_classes)

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        y_pred = self.forward(x)
        loss = self.loss(y_pred.view(-1, self.num_classes), y.view(-1))
        accuracy = self.accuracy(y_pred.view(-1, 4), y.view(-1))
        self.log("train_accuracy", accuracy)
        self.log('train_loss', loss)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self.forward(x)
        test_loss = self.loss(y_pred.view(-1, self.num_classes), y.view(-1))
        test_accuracy = self.accuracy(y_pred.view(-1, 4), y.view(-1))
        self.log("test_accuracy", test_accuracy)
        self.log("test_loss", test_loss)

    def forward(self, x):
        out, (ht, ct) = self.lstm(x)
        y_pred = self.fc1(out)
        return y_pred


In [12]:
train_ds = ECGDataset(root_dir="./dataset/",
                        window=5, split='train', normalize=False)
train_dl = DataLoader(train_ds, batch_size=8,
                        shuffle=False, generator=torch.random.manual_seed(1))

test_ds = ECGDataset(root_dir="./dataset/", window=5,
                        split='test', normalize=False)
test_dl = DataLoader(test_ds, batch_size=8)


In [13]:
model = ECGClassifierModel(
            input_size=9, hidden_size=16, num_layers=1, num_classes=4)
for batch in train_dl:
    x,y = batch
    out = model(x)
    print(model.loss(out.view(-1, 4),y.view(-1)))
    print(model.accuracy(out.view(-1, 4), y.view(-1)))
    break


tensor(1.4161, grad_fn=<NllLossBackward0>)
tensor(0.0801)


In [14]:
print(out.shape)
print(y.shape)
y

torch.Size([8, 496, 4])
torch.Size([8, 496])


tensor([[0, 0, 0,  ..., 2, 2, 2],
        [2, 2, 2,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 2],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]])

In [15]:
def create_and_fit(window: list, batch_size: int, normalize: bool, hidden_size: int):
    for w in window:
        train_ds = ECGDataset(root_dir="./dataset/",
                              window=w, split='train', normalize=normalize)
        train_dl = DataLoader(train_ds, batch_size=batch_size,
                              shuffle=False, generator=torch.random.manual_seed(1),
                              num_workers=os.cpu_count())

        test_ds = ECGDataset(root_dir="./dataset/", window=w,
                             split='test', normalize=normalize)
        test_dl = DataLoader(test_ds, batch_size=batch_size,
                             num_workers=os.cpu_count())

        checkpoint_callback = ModelCheckpoint(
            save_top_k=10,
            monitor="train_loss",
            mode="min",
            filename=f"ecg-lstm-normalized-window={w}-{{epoch:02d}}-{{train_loss:.2f}}",
            save_last=True,
            every_n_epochs=5
        )
        early_stopping_callback = EarlyStopping(
            monitor="train_loss", min_delta=0.01, mode="min", patience=2)

        input_size = w*2-1
        hidden_size = hidden_size
        model = ECGClassifierModel(
            input_size=input_size, hidden_size=hidden_size, num_layers=1, num_classes=4)
        trainer = pl.Trainer(max_epochs=20, accelerator='gpu', devices=1,
                             log_every_n_steps=10, callbacks=[checkpoint_callback, early_stopping_callback])
        trainer.fit(model, train_dl)
        trainer.test(model, dataloaders=test_dl)


In [16]:
create_and_fit(window=[5, 10, 15], batch_size=20,
               normalize=False, hidden_size=32)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type             | Params
----------------------------------------------
0 | lstm     | LSTM             | 11.0 K
1 | fc1      | Linear           | 260   
2 | loss     | CrossEntropyLoss | 0     
3 | accuracy | Accuracy         | 0     
----------------------------------------------
11.3 K    Trainable params
0         Non-trainable params
11.3 K    Total params
0.045     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_accuracy         0.9515876770019531
        test_loss           0.13021698594093323
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type             | Params
----------------------------------------------
0 | lstm     | LSTM             | 13.6 K
1 | fc1      | Linear           | 260   
2 | loss     | CrossEntropyLoss | 0     
3 | accuracy | Accuracy         | 0     
----------------------------------------------
13.8 K    Trainable params
0         Non-trainable params
13.8 K    Total params
0.055     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_accuracy          0.966446042060852
        test_loss           0.09857232868671417
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type             | Params
----------------------------------------------
0 | lstm     | LSTM             | 16.1 K
1 | fc1      | Linear           | 260   
2 | loss     | CrossEntropyLoss | 0     
3 | accuracy | Accuracy         | 0     
----------------------------------------------
16.4 K    Trainable params
0         Non-trainable params
16.4 K    Total params
0.066     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_accuracy         0.9593364000320435
        test_loss           0.11901750415563583
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
