In [1]:
import pandas as pd
import numpy as np

In [2]:
from glob import glob
from pathlib import Path

In [3]:
from joblib import Parallel, delayed

In [4]:
files = glob('../data/ecg_features_60s_clean_twa_rqa_60s/*.csv')
# files = glob('../data/ecg_preprocessed_cleaned/*.csv')

In [5]:
X_labels =  [
    "hrv_mean",
    "hrv_min",
    "hrv_max",
    "hrv_std",
    "hrv_rms",
    "hr_mean",
    "hr_min",
    "hr_max",
    "rr_mean",
    "rr_min",
    "rr_max",
    "rr_std",
    "nn50",
    "pnn50",
    "rmssd",
    "lf",
    "hf",
    "vhf",
    "uhf",
    "tp",
    "lp_hf",
    "lp_vhf",
    "lp_uhf",
    "hf_normalized",
    "w",
    "wmax",
    "wen",
    "MeanNN",
    "SDNN",
    "SDANN1",
    "SDNNI1",
    "SDANN2",
    "SDNNI2",
    "SDANN5",
    "SDNNI5",
    "RMSSD",
    "SDSD",
    "CVNN",
    "CVSD",
    "MedianNN",
    "MadNN",
    "MCVNN",
    "IQRNN",
    "SDRMSSD",
    "Prc20NN",
    "Prc80NN",
    "pNN50",
    "pNN20",
    "MinNN",
    "MaxNN",
    "HTI",
    "TINN",
    "twa",
]
y_label = 'target'

In [6]:
def to_model_dataset(output_dir, baseline = 0, mental_stress = -1, high_physical_activity = -1, moderate_physical_activity = -1, low_physical_activity = -1):
    def inner(file):
        df = pd.read_csv(file)

        baseline_list = ['Sitting', 'Recov1', 'Recov2', 'Recov3', 'Recov4', 'Recov5', 'Recov6']
        mental_stress_list = ['TA', 'SSST_Sing_countdown', 'Pasat', 'Raven', 'TA_repeat', 'Pasat_repeat']
        high_physical_stress_list = ['Treadmill1', 'Treadmill2', 'Treadmill3', 'Treadmill4', 'Walking_fast_pace', 'Cycling', 'stairs_up_and_down']
        moderate_physical_stress_list = ['Walking_own_pace', 'Dishes', 'Vacuum']
        low_physical_stress_list = ['Standing', 'Lying_supine', 'Recov_standing']

        df['target'] = -1
        df['target'] = df['target'].astype(int)

        if baseline != -1:
            for category in baseline_list:
                df.loc[df['label'] == category, 'target'] = baseline
        if mental_stress != -1:
            for category in mental_stress_list:
                df.loc[df['label'] == category, 'target'] = mental_stress
        if high_physical_activity != -1:
            for category in high_physical_stress_list:
                df.loc[df['label'] == category, 'target'] = high_physical_activity
        if moderate_physical_activity != -1:
            for category in moderate_physical_stress_list:
                df.loc[df['label'] == category, 'target'] = moderate_physical_activity
        if low_physical_activity != -1:
            for category in low_physical_stress_list:
                df.loc[df['label'] == category, 'target'] = low_physical_activity

        df = df[df.target != -1]
        # df = df[['ECG_Clean', 'label']]
        df = df[X_labels + [y_label]]
        # df = df.rename(columns={'ECG_Clean': 'signal'})
        df = df.fillna(0)
        df = df.replace(np.nan, 0)

        df.to_csv(f'{output_dir}/{Path(file).name}', index=False)
    return inner


In [7]:
# output_dir = f'../data/ecg_better_model_physical_activity'
# for file in files:
#     to_model_dataset(output_dir, mental_stress=1)(file)

In [8]:
output_dir = f'../data/nn_features_mental_stress'
Parallel(n_jobs=4)(delayed(to_model_dataset(output_dir, mental_stress = 1))(file) for file in files)

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [9]:
output_dir = f'../data/nn_features_physical_activity'
Parallel(n_jobs=4)(delayed(to_model_dataset(output_dir, high_physical_activity = 1))(file) for file in files)

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [10]:
output_dir = f'../data/nn_features_comparison'
Parallel(n_jobs=4)(delayed(to_model_dataset(output_dir, baseline = -1, mental_stress = 0, high_physical_activity = 1 ))(file) for file in files)

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]