In [1]:
import datasets
from datasets import load_dataset
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import pandas as pd

In [3]:
from pathlib import Path
from glob import glob

In [4]:
participants = [Path(path).stem for path in glob('./data/wesad_features_20s/*.csv')]
train_participants, test_participants = train_test_split(participants, test_size=0.2)

In [5]:
X_labels = ['mean_hr', 'std_hr', 'HRV_TINN', 'hrv_index', 'nn50', 'HRV_pNN50', 'mean_hrv', 'std_hrv', 'rms_hrv', 'mean_fourier_frequencies', 'std_fourier_frequencies', 'sum_psd']
y_label = 'label'

In [6]:
for participant in train_participants:
    df = pd.read_csv(f'./data/wesad_features/{participant}.csv', index_col=0)
    # "I have chosen to discard weird data (HRV of 2s for example)"
    df = df[df['mean_hr'] > 2]
    # "I have also chosen to balance the data set to have 50% of stress data and 50% of non-stress data to improve learning"
    stress = df[df['label'] == 2]
    n_stress_samples = len(stress)
    n_per_non_stress = n_stress_samples // 3
    meditation = df[df['label'] == 0].head(n_per_non_stress)
    baseline = df[df['label'] == 1].head(n_per_non_stress)
    amusement = df[df['label'] == 3].head(n_per_non_stress)
    df = pd.concat([meditation, baseline, stress, amusement])

    df['label'] = df['label'].map({0: 0, 1: 0, 2: 1, 3: 0})

    df = df.fillna(0)
    df = df[X_labels + [y_label]]

    df.to_csv(f'./data/wesad_model_20s/{participant}.csv', index=False)
    # print(f"Train: {participant} len: {len(df)}, stress: {len(stress)} - non-stress: {len(meditation) + len(baseline) + len(amusement)} =  meditation: {len(meditation)}, baseline: {len(baseline)}, stress: {len(stress)}, amusement: {len(amusement)}")