In [1]:
import numpy as np 

In [2]:
from sia import Pipeline
from sia.io import Metadata, read_edf, read_csv, read_dataset, write_csv, write_png, write_dataset
from sia.preprocessors import neurokit
from sia.transformers import as_window, scaleogram

In [3]:
def encode_category(target):
    def inner(label):
        return {
            'label': np.isin(label, target).astype(int)
        }
    return inner

In [4]:
def resize(width: int, height: int):
    def inner(image):
        return {
            'pixel_values': [x.convert("RGB").resize((width, height)) for x in image]
        }
    return inner

In [5]:
Pipeline() \
    .data(
        read_edf(
            './data/ecg_raw/*.edf', 
            Metadata('./data/ecg_raw/TimeStamps_Merged.txt').on_regex(r'[0-9]{5}')
        )
    ) \
    .to(write_csv('./data/ecg_unprocessed/[0-9]{5}.csv'))

Creating CSV from Arrow format: 100%|██████████| 8692/8692 [00:25<00:00, 340.69ba/s]
Creating CSV from Arrow format: 100%|██████████| 8344/8344 [00:24<00:00, 334.25ba/s]
Creating CSV from Arrow format: 100%|██████████| 8174/8174 [00:23<00:00, 347.94ba/s]
Creating CSV from Arrow format: 100%|██████████| 9684/9684 [00:28<00:00, 340.86ba/s]
Creating CSV from Arrow format: 100%|██████████| 7842/7842 [00:22<00:00, 341.19ba/s]
Creating CSV from Arrow format: 100%|██████████| 7729/7729 [00:22<00:00, 344.27ba/s]
Creating CSV from Arrow format: 100%|██████████| 7737/7737 [00:22<00:00, 341.13ba/s]
Creating CSV from Arrow format: 100%|██████████| 9241/9241 [00:27<00:00, 341.76ba/s]
Creating CSV from Arrow format: 100%|██████████| 8718/8718 [00:25<00:00, 342.51ba/s]
Creating CSV from Arrow format: 100%|██████████| 10272/10272 [00:30<00:00, 341.46ba/s]
Creating CSV from Arrow format: 100%|██████████| 8212/8212 [00:24<00:00, 339.92ba/s]
Creating CSV from Arrow format: 100%|██████████| 7871/7871 [00:

In [None]:
Pipeline() \
    .data(
        read_edf(
            './data/ecg_raw/*.edf', 
            Metadata('./data/ecg_raw/TimeStamps_Merged.txt').on_regex(r'[0-9]{5}')
        )
    ) \
    .process(neurokit()) \
    .to(write_csv('./data/ecg_preprocessed/[0-9]{5}.csv'))

In [5]:
Pipeline() \
    .data(read_csv('./data/ecg_preprocessed/*.csv', ['ECG_Clean', 'category'])) \
    .rename({'ECG_Clean': 'signal', 'category': 'label'}) \
    .to(write_csv('./data/ecg_model_with_features/[0-9]{5}.csv'))

Generating train split: 8692000 examples [00:15, 555522.88 examples/s]
Creating CSV from Arrow format: 100%|██████████| 8692/8692 [00:15<00:00, 568.14ba/s]
Generating train split: 8344000 examples [00:14, 565807.32 examples/s]
Creating CSV from Arrow format: 100%|██████████| 8344/8344 [00:14<00:00, 581.78ba/s]
Generating train split: 8174000 examples [00:15, 535978.88 examples/s]
Creating CSV from Arrow format: 100%|██████████| 8174/8174 [00:14<00:00, 569.86ba/s]
Generating train split: 9684000 examples [00:18, 518489.69 examples/s]
Creating CSV from Arrow format: 100%|██████████| 9684/9684 [00:16<00:00, 574.73ba/s]
Generating train split: 7842000 examples [00:14, 529285.42 examples/s]
Creating CSV from Arrow format: 100%|██████████| 7842/7842 [00:13<00:00, 563.19ba/s]
Generating train split: 7729000 examples [00:15, 509793.63 examples/s]
Creating CSV from Arrow format: 100%|██████████| 7729/7729 [00:13<00:00, 553.39ba/s]
Generating train split: 7737000 examples [00:14, 521745.65 examp

In [None]:
# Pipeline() \
#     .data(read_csv('./data/ecg_preprocessed/*.csv', ['ECG_Clean', 'category'])) \
#     .rename({'ECG_Clean': 'signal', 'category': 'label'}) \
#     .process(encode_category(['TA', 'SSST_Sing_countdown', 'Pasat', 'Raven', 'TA_repeat', 'Pasat_repeat'])) \
#     .to(write_csv('./data/ecg_model/[0-9]{5}.csv'))

In [None]:
# Pipeline() \
#     .data(read_csv('./data/ecg_preprocessed/*.csv', ['ECG_Clean', 'category'])) \
#     .rename({'ECG_Clean': 'signal', 'category': 'label'}) \
#     .process(encode_category(['TA', 'SSST_Sing_countdown', 'Pasat', 'Raven', 'TA_repeat', 'Pasat_repeat'])) \
#     .process(as_window(1000)) \
#     .drop('signal') \
#     .to(write_csv('./data/ecg_windowed_model/[0-9]{5}.csv'))

In [None]:
Pipeline() \
    .data(read_csv('./data/ecg_preprocessed/*.csv', ['ECG_Clean', 'category'])) \
    .rename({'ECG_Clean': 'signal', 'category': 'label'}) \
    .process(scaleogram('morl')) \
    .to(write_png('./data/scaleograms/[0-9]{5}/'))

In [None]:
Pipeline() \
    .data(read_dataset('./data/scaleograms/**/')) \
    .process(resize(127, 127)) \
    .drop('image') \
    .to(write_dataset('./data/scaleograms_resized/[0-9]{5}/'))