In [1]:
import numpy as np 

In [2]:
from sia import Pipeline
from sia.io import Metadata, read_edf, read_csv, read_dataset, write_csv, write_png, write_dataset
from sia.preprocessors import neurokit
from sia.transformers import as_window, scaleogram

In [3]:
def encode_category(target):
    def inner(label):
        return {
            'label': np.isin(label, target).astype(int)
        }
    return inner

In [4]:
def resize(width: int, height: int):
    def inner(image):
        return {
            'pixel_values': [x.convert("RGB").resize((width, height)) for x in image]
        }
    return inner

In [5]:
Pipeline() \
    .data(
        read_edf(
            './data/ecg_raw/*.edf', 
            Metadata('./data/ecg_raw/TimeStamps_Merged.txt').on_regex(r'[0-9]{5}')
        )
    ) \
    .process(neurokit()) \
    .filter(lambda ECG_Quality: [quality > .25 for quality in ECG_Quality]) \
    .to(write_csv('./data/ecg_preprocessed_peaks/[0-9]{5}.csv'))

Map (num_proc=8):   0%|          | 0/8692000 [00:00<?, ? examples/s]

Filter (num_proc=8):   0%|          | 0/8692000 [00:00<?, ? examples/s]

Creating CSV from Arrow format:   0%|          | 0/8671 [00:00<?, ?ba/s]

Map (num_proc=8):   0%|          | 0/8344000 [00:00<?, ? examples/s]

KeyboardInterrupt: 

In [5]:
Pipeline() \
    .data(read_csv('./data/ecg_preprocessed/*.csv', ['ECG_Clean', 'category'])) \
    .rename({'ECG_Clean': 'signal', 'category': 'label'}) \
    .to(write_csv('./data/ecg_model_with_features/[0-9]{5}.csv'))

Generating train split: 8692000 examples [00:15, 555522.88 examples/s]
Creating CSV from Arrow format: 100%|██████████| 8692/8692 [00:15<00:00, 568.14ba/s]
Generating train split: 8344000 examples [00:14, 565807.32 examples/s]
Creating CSV from Arrow format: 100%|██████████| 8344/8344 [00:14<00:00, 581.78ba/s]
Generating train split: 8174000 examples [00:15, 535978.88 examples/s]
Creating CSV from Arrow format: 100%|██████████| 8174/8174 [00:14<00:00, 569.86ba/s]
Generating train split: 9684000 examples [00:18, 518489.69 examples/s]
Creating CSV from Arrow format: 100%|██████████| 9684/9684 [00:16<00:00, 574.73ba/s]
Generating train split: 7842000 examples [00:14, 529285.42 examples/s]
Creating CSV from Arrow format: 100%|██████████| 7842/7842 [00:13<00:00, 563.19ba/s]
Generating train split: 7729000 examples [00:15, 509793.63 examples/s]
Creating CSV from Arrow format: 100%|██████████| 7729/7729 [00:13<00:00, 553.39ba/s]
Generating train split: 7737000 examples [00:14, 521745.65 examp