In [None]:
# %pip install -r requirements.txt

## Preprocessing
The preprocessing pipelines are defined in this notebook. Throughout the project, two pipelines are mainly used, the pipeline that cleans the data, so that it can be used to extract other features, and the pipeline that extracts the `signal` and `label` for a neural network.

In [None]:
from sia import Preprocessing
from sia.io import Metadata, read_edf, read_csv, write_csv
from sia.preprocessors import neurokit

In [None]:
from sia.encoders import GroupEncoder
from sklearn.preprocessing import LabelEncoder

### Cleaning Pipeline
The pipeline defined below uses the default preprocessor of Neurokit to clean the data. Furthermore, the data with a low quality is also discarded.

In [None]:
Preprocessing() \
    .data(
        read_edf(
            './data/raw/*.edf', 
            Metadata('./data/raw/TimeStamps_Merged.txt').on_regex(r'[0-9]{5}')
        )
    ) \
    .rename({'category': 'label'}) \
    .encode({'label': 'category'}, GroupEncoder({
        'baseline': ['Sitting', 'Recov1', 'Recov2', 'Recov3', 'Recov4', 'Recov5', 'Recov6'],
        'mental_stress': ['TA', 'SSST_Sing_countdown', 'Pasat', 'Raven', 'TA_repeat', 'Pasat_repeat'],
        'high_physical_activity': ['Treadmill1', 'Treadmill2', 'Treadmill3', 'Treadmill4', 'Walking_fast_pace', 'Cycling', 'stairs_up_and_down'],
        'moderate_physical_activity': ['Walking_own_pace', 'Dishes', 'Vacuum'],
        'low_physical_activity': ['Standing', 'Lying_supine', 'Recov_standing']
    })) \
    .filter(lambda category: [_category != None for _category in category]) \
    .process(neurokit()) \
    .filter(lambda ECG_Quality: [quality > .25 for quality in ECG_Quality]) \
    .to(write_csv('./data/cleaned/[0-9]{5}.csv'))

### Neural Network 
For the neural network, only the signal and the label is extracted and saved in another directory.

In [None]:
Preprocessing() \
    .data(read_csv('./data/cleaned/*.csv', columns=['ECG_Clean', 'category'])) \
    .rename({'ECG_Clean': 'signal'}) \
    .encode('category', LabelEncoder()) \
    .to(write_csv('./data/signal/[0-9]{5}.csv'))