In [11]:
import numpy as np 
import pandas as pd

In [2]:
from sia import Pipeline
from sia.io import Metadata, read_edf, read_csv, write_csv
from sia.preprocessors import neurokit

In [3]:
Pipeline() \
    .data(
        read_edf(
            './data/ecg_raw/*.edf', 
            Metadata('./data/ecg_raw/TimeStamps_Merged.txt').on_regex(r'[0-9]{5}')
        )
    ) \
    .preprocess(neurokit()) \
    .to(write_csv('./data/test/[0-9]{5}.csv'))

Finalizing... ./data/ecg_raw\30235_LAB_Conditions_ECG.edf: : 127it [3:00:53, 85.46s/it]          


In [3]:
def reduce(data: np.ndarray):
    """
        From (timestamp, category, ecg) to (ecg, category) 
    """
    return data[:, [2, 1]]

In [4]:
target_labels = ['TA', 'SSST_Sing_countdown', 'Pasat', 'Raven', 'TA_repeat', 'Pasat_repeat']

def encode_category(data: np.ndarray):
    """
        From (ecg, category) to (ecg, bool)
    """
    data[:, 1][~np.isin(data[:, 1], target_labels)] = 0
    data[:, 1][np.isin(data[:, 1], target_labels)] = 1
    return data

In [6]:
Pipeline() \
    .data(read_csv('./data/test/*.csv')) \
    .reduce(reduce) \
    .postprocess(encode_category) \
    .to(write_csv('./data/test_2/[0-9]{5}.csv'))

Finalizing... ./data/test\30235.csv: : 127it [26:03, 12.31s/it]          


In [12]:
from datasets import Dataset as HFDataset, load_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [50]:
from glob import glob 

import torch

class Dataset(torch.utils.data.Dataset):
    def __init__(self, data: HFDataset, window: int = 1000):
        self.data = data
        self.window = window

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if idx + self.window > len(self.data):
            raise StopIteration

        window = self.data[idx: idx + self.window]

        x = window['signal']
        y = torch.tensor(1, device='cuda') if torch.mode(window['label'], 0)[0] == 1 else torch.tensor(0, device='cuda')

        return x, y

In [45]:
from sklearn.model_selection import train_test_split

participants = glob("./data/smoll/*.csv")
train_participants, test_participants = train_test_split(participants, test_size=0.2)

In [54]:
data = load_dataset('csv', data_files={
    'train': train_participants[:1],
    'test': test_participants[:1]
}, column_names=['signal', 'label'], num_proc=8)

data = data.with_format('torch', device='cuda')

In [51]:
train_dataset = Dataset(data['train'])
test_dataset = Dataset(data['test'])

In [52]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(train_dataset, batch_size=1024)
test_dataloader = DataLoader(test_dataset, batch_size=1024)

In [49]:
%timeit for x in train_dataloader: pass

16.6 s ± 105 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [53]:
%timeit for x in train_dataloader: pass

24.6 s ± 97.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [102]:
import lightning as L

import wandb

wandb.init(
    # set the wandb project where this run will be logged
    project="stress-in-action",
    
    # track hyperparameters and run metadata
    config={
        "learning_rate": 0.02,
        "architecture": "CNN",
        "dataset": "SiA",
        "epochs": 11,
    }
)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33malex-antonides[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [114]:
from lightning.pytorch.utilities.types import OptimizerLRScheduler
from torch import nn

class Test(L.LightningModule):
    def __init__(self):
        super().__init__()
        
        self.layers = nn.Sequential(
            nn.Linear(1000, 10),
            nn.Tanh(),
            nn.Linear(10, 1),
            nn.Softmax(-1),
        )

        self.layers.cuda(0)

    def forward(self, x):
        embedding = self.layers(x)
        return embedding

    def configure_optimizers(self) -> OptimizerLRScheduler:
        optimizer = torch.optim.Adam(self.parameters(), lr=0.02)
        return optimizer
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = nn.functional.cross_entropy(y_hat, y)
        wandb.log({"loss": loss})
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = nn.functional.cross_entropy(y_hat, y)
        wandb.log({"val_loss": loss})
        return loss

model = Test()
model.cuda()
trainer = L.Trainer(max_epochs=11, accelerator="gpu", devices="auto", strategy="auto", profiler="simple")
trainer.fit(model, train_dataloader, test_dataloader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params
--------------------------------------
0 | layers | Sequential | 10.0 K
--------------------------------------
10.0 K    Trainable params
0         Non-trainable params
10.0 K    Total params
0.040     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

                                                                           

c:\Users\Admin\AppData\Local\Programs\Python\Python311\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Epoch 0:   0%|          | 184/21619862 [00:11<372:29:50, 16.12it/s, v_num=87]

c:\Users\Admin\AppData\Local\Programs\Python\Python311\Lib\site-packages\lightning\pytorch\trainer\call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...


In [53]:
wandb.finish()