In [1]:
import numpy as np 

In [2]:
from sia import Pipeline
from sia.io import Metadata, read_edf, read_csv, write_csv
from sia.preprocessors import neurokit

In [3]:
def reduce(data: np.ndarray):
    """
        From (timestamp, category, ecg) to (ecg, category) 
    """
    return data[:, [2, 1]]

In [4]:
target_labels = ['TA', 'SSST_Sing_countdown', 'Pasat', 'Raven', 'TA_repeat', 'Pasat_repeat']

def encode_category(data: np.ndarray):
    """
        From (ecg, category) to (ecg, bool)
    """
    data[:, 1][~np.isin(data[:, 1], target_labels)] = 0
    data[:, 1][np.isin(data[:, 1], target_labels)] = 1
    return data

In [None]:
Pipeline() \
    .data(
        read_edf(
            './data/ecg_raw/*.edf', 
            Metadata('./data/ecg_raw/TimeStamps_Merged.txt').on_regex(r'[0-9]{5}')
        )
    ) \
    .preprocess(neurokit()) \
    .to(write_csv('./data/ecg_preprocessed/[0-9]{5}.csv'))

In [None]:
Pipeline() \
    .data(read_csv('./data/ecg_preprocessed/*.csv')) \
    .reduce(reduce) \
    .postprocess(encode_category) \
    .to(write_csv('./data/ecg_model/[0-9]{5}.csv'))