In [34]:
import os
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import scipy.signal as scisig
import scipy.stats
import cvxEDA

In [35]:
fs_dict = {'ACC': 32, 'BVP': 64, 'EDA': 4, 'TEMP': 4, 'label': 700, 'Resp': 700}
WINDOW_IN_SECONDS = 60
label_dict = {'baseline': 1, 'stress': 2, 'amusement': 0}
int_to_label = {1: 'baseline', 2: 'stress', 0: 'amusement'}
feat_names = None
DATA_PATH = r'C:\Users\IALAB\Downloads\WESAD-master\data\WESAD/'
SAVE_PATH = r'C:\Users\IALAB\Downloads\WESAD-master\data_sync/'


In [36]:
if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)

In [None]:
def eda_stats(y):
    Fs = fs_dict['EDA']
    yn = (y - y.mean()) / y.std()
    print(yn)
    [r, p, t, l, d, e, obj] = cvxEDA.cvxEDA(yn, 1. / Fs)

    return [r, p, t, l, d, e, obj]

In [38]:
class SubjectData:

    def __init__(self, main_path, subject_number):
        self.name = f'S{subject_number}'
        self.subject_keys = ['signal', 'label', 'subject']
        self.signal_keys = ['chest', 'wrist']
        self.chest_keys = ['ACC', 'ECG', 'EMG', 'EDA', 'Temp', 'Resp']
        self.wrist_keys = ['ACC', 'BVP', 'EDA', 'TEMP']
        with open(os.path.join(main_path, self.name) + '/' + self.name + '.pkl', 'rb') as file:
            self.data = pickle.load(file, encoding='latin1')
        self.labels = self.data['label']

    def get_wrist_data(self):
        data = self.data['signal']['wrist']
        data.update({'Resp': self.data['signal']['chest']['Resp']})
        return data

    def get_chest_data(self):
        return self.data['signal']['chest']

    def extract_features(self):  # only wrist
        results = \
            {
                key: get_statistics(self.get_wrist_data()[key].flatten(), self.labels, key)
                for key in self.wrist_keys
            }
        return results

In [None]:
def butter_lowpass(cutoff, fs, order=5):
    # Filtering Helper functions
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = scisig.butter(order, normal_cutoff, btype='low', analog=False)
    return b, a


def butter_lowpass_filter(data, cutoff, fs, order=5):
    # Filtering Helper functions
    b, a = butter_lowpass(cutoff, fs, order=order)
    y = scisig.lfilter(b, a, data)
    return y
    

def get_slope(series):
    linreg = scipy.stats.linregress(np.arange(len(series)), series )
    slope = linreg[0]
    return slope


def get_window_stats(data, label=-1):
    mean_features = np.mean(data)
    std_features = np.std(data)
    min_features = np.amin(data)
    max_features = np.amax(data)

    features = {'mean': mean_features, 'std': std_features, 'min': min_features, 'max': max_features,
                'label': label}
    return features


def get_net_accel(data):
    return (data['ACC_x'] ** 2 + data['ACC_y'] ** 2 + data['ACC_z'] ** 2).apply(lambda x: np.sqrt(x))


def get_peak_freq(x):
    f, Pxx = scisig.periodogram(x, fs=8)
    psd_dict = {amp: freq for amp, freq in zip(Pxx, f)}
    peak_freq = psd_dict[max(psd_dict.keys())]
    return peak_freq


# https://github.com/MITMediaLabAffectiveComputing/eda-explorer/blob/master/AccelerometerFeatureExtractionScript.py
def filterSignalFIR(eda, cutoff=0.4, numtaps=64):
    f = cutoff / (fs_dict['ACC'] / 2.0)
    FIR_coeff = scisig.firwin(numtaps, f)

    return scisig.lfilter(FIR_coeff, 1, eda.flatten())

In [40]:
def compute_features(e4_data_dict, labels, norm_type=None):
    # Dataframes for each sensor type
    eda_df = pd.DataFrame(e4_data_dict['EDA'], columns=['EDA'])
    bvp_df = pd.DataFrame(e4_data_dict['BVP'], columns=['BVP'])
    #acc_df = pd.DataFrame(e4_data_dict['ACC'], columns=['ACC_x', 'ACC_y', 'ACC_z'])
    temp_df = pd.DataFrame(e4_data_dict['TEMP'], columns=['TEMP'])
    label_df = pd.DataFrame(labels, columns=['label'])
    resp_df = pd.DataFrame(e4_data_dict['Resp'], columns=['Resp'])

    # Filter EDA
    eda_df['EDA'] = butter_lowpass_filter(eda_df['EDA'], 1.0, fs_dict['EDA'], 6)

    # Filter ACM
    #for col in acc_df.columns:
        #acc_df[col] = filterSignalFIR(acc_df.values)

    # Adding indices for combination due to differing sampling frequencies
    eda_df.index = np.arange(0, len(eda_df) * (1 / fs_dict['EDA']), (1 / fs_dict['EDA']))
    bvp_df.index = np.arange(0, len(bvp_df) * (1 / fs_dict['BVP']), (1 / fs_dict['BVP']))
    #acc_df.index = np.arange(0, len(acc_df) * (1 / fs_dict['ACC']), (1 / fs_dict['ACC']))
    temp_df.index = np.arange(0, len(temp_df) * (1 / fs_dict['TEMP']), (1 / fs_dict['TEMP']))
    label_df.index = np.arange(0, len(label_df) * (1 / fs_dict['label']), (1 / fs_dict['label']))
    resp_df.index = np.arange(0, len(resp_df) * (1 / fs_dict['Resp']), (1 / fs_dict['Resp']))

    # Change indices to datetime
    eda_df.index = pd.to_datetime(eda_df.index, unit='s')
    bvp_df.index = pd.to_datetime(bvp_df.index, unit='s')
    temp_df.index = pd.to_datetime(temp_df.index, unit='s')
    #acc_df.index = pd.to_datetime(acc_df.index, unit='s')
    label_df.index = pd.to_datetime(label_df.index, unit='s')
    resp_df.index = pd.to_datetime(resp_df.index, unit='s')
    print('wa')
    # New EDA features
    r, p, t, l, d, e, obj = eda_stats(eda_df['EDA'])
    eda_df['EDA_phasic'] = r
    eda_df['EDA_smna'] = p
    eda_df['EDA_tonic'] = t
        
    # Combined dataframe - not used yet
    df = eda_df.join(bvp_df, how='outer')
    df = df.join(temp_df, how='outer')
    #df = df.join(acc_df, how='outer')
    df = df.join(resp_df, how='outer')
    df = df.join(label_df, how='outer')
    df['label'] = df['label'].fillna(method='bfill')
    df.reset_index(drop=True, inplace=True)

    if norm_type == 'std':
        # std norm
        df = (df - df.mean()) / df.std()
    elif norm_type == 'minmax':
        # minmax norm
        df = (df - df.min()) / (df.max() - df.min())

    # Groupby
    grouped = df.groupby('label')
    baseline = grouped.get_group(1)
    stress = grouped.get_group(2)
    amusement = grouped.get_group(3)
    print('fin1')
    return grouped, baseline, stress, amusement

In [41]:
def get_samples(data, label, stride_seconds=0.25):
    global feat_names
    global WINDOW_IN_SECONDS

    samples = []
    
    # Convertir tiempo a muestras
    window_len = int(fs_dict['label'] * WINDOW_IN_SECONDS)  # Asegurar que sea entero
    stride_len = int(fs_dict['label'] * stride_seconds)  # Convertir stride a entero

    # Crear ventanas solapadas
    for start in range(0, len(data) - window_len, stride_len):
        # Extraer ventana
        w = data[start:start + window_len]

        # Calcular estadísticas
        wstats = get_window_stats(w, label=label)

        if not wstats:  # Si get_window_stats() no devuelve nada, continuar
            print(f"⚠️ Advertencia: Ventana vacía en índice {start}, saltando...")
            continue

        # Formatear en DataFrame
        # Convertir a DataFrame
        x = pd.DataFrame(wstats).drop('label', axis=0)
        y = label  # ✅ Asignamos el label directamente en lugar de extraerlo de 'wstats'
        x.drop('label', axis=1, inplace=True)

        if feat_names is None:
            feat_names = ['{}_{}'.format(row, col) for row in x.index for col in x.columns]

        # **Aquí nos aseguramos de que 'wdf' existe antes de usarlo**
        wdf = pd.DataFrame(x.values.flatten()).T
        wdf.columns = feat_names

        # Verificar si feat_names y wdf tienen la misma cantidad de columnas
        if len(feat_names) != wdf.shape[1]:
            print(f"⚠️ Advertencia: Ajustando feat_names ({len(feat_names)}) a {wdf.shape[1]}")
            feat_names = feat_names[:wdf.shape[1]]  # Ajustamos para evitar error

        wdf = pd.concat([wdf, pd.DataFrame({'label': y}, index=[0])], axis=1)

        # Extraer más características
        wdf['BVP_peak_freq'] = get_peak_freq(w['BVP'].dropna())
        wdf['TEMP_slope'] = get_slope(w['TEMP'].dropna())

        # Guardar ventana
        samples.append(wdf)

    if not samples:
        print("⚠️ Advertencia: No se generaron muestras en get_samples(), devolviendo DataFrame vacío.")
        return pd.DataFrame()  # Retornar DataFrame vacío en caso de error

    return pd.concat(samples)



In [42]:
def make_patient_data(subject_id):
    global SAVE_PATH
    global WINDOW_IN_SECONDS

    # Make subject data object for Sx
    subject = SubjectData(main_path=r'C:\Users\IALAB\Downloads\WESAD-master\data\WESAD', subject_number=subject_id)

    # Empatica E4 data - now with resp
    e4_data_dict = subject.get_wrist_data()

    # norm type
    norm_type = None

    # The 3 classes we are classifying
    grouped, baseline, stress, amusement = compute_features(e4_data_dict, subject.labels, norm_type)

    # print(f'Available windows for {subject.name}:')
    n_baseline_wdws = int(len(baseline) / (fs_dict['label'] * WINDOW_IN_SECONDS))
    n_stress_wdws = int(len(stress) / (fs_dict['label'] * WINDOW_IN_SECONDS))
    n_amusement_wdws = int(len(amusement) / (fs_dict['label'] * WINDOW_IN_SECONDS))
    # print(f'Baseline: {n_baseline_wdws}\nStress: {n_stress_wdws}\nAmusement: {n_amusement_wdws}\n')
    print(f"Procesando S{subject_id}:")
    print(f"  - Baseline windows: {n_baseline_wdws}")
    print(f"  - Stress windows: {n_stress_wdws}")
    print(f"  - Amusement windows: {n_amusement_wdws}")
    #
    baseline_samples = get_samples(baseline, label=0, stride_seconds = 0.25)
    # Downsampling
    # baseline_samples = baseline_samples[::2]
    stress_samples = get_samples(stress, label=1, stride_seconds = 0.25)
    amusement_samples = get_samples(amusement, label=2, stride_seconds = 0.25)

    all_samples = pd.concat([baseline_samples, stress_samples, amusement_samples])
    all_samples['label'] = all_samples['label'].astype(int)
    all_samples = pd.concat([all_samples.drop('label', axis=1), pd.get_dummies(all_samples['label'])], axis=1)
    # Selected Features
    # all_samples = all_samples[['EDA_mean', 'EDA_std', 'EDA_min', 'EDA_max',
    #                          'BVP_mean', 'BVP_std', 'BVP_min', 'BVP_max',
    #                        'TEMP_mean', 'TEMP_std', 'TEMP_min', 'TEMP_max',
    #                        'net_acc_mean', 'net_acc_std', 'net_acc_min', 'net_acc_max',
    #                        0, 1, 2]]
    # Save file as csv (for now)
    all_samples.to_csv(f'{SAVE_PATH}/S{subject_id}_feats_4.csv')

    # Does this save any space?
    subject = None

In [43]:
def combine_files(subjects):
    df_list = []
    for s in subjects:
        df = pd.read_csv(f'{SAVE_PATH}/S{s}_feats_4.csv', index_col=0)
        df['subject'] = s
        df_list.append(df)

    df = pd.concat(df_list)

    print(df.head(10))
    print(df.columns)

    df['label'] = df[['0', '1', '2']].apply(lambda x: x.values.argmax(), axis=1)
    df.drop(['0', '1', '2'], axis=1, inplace=True)

    df.reset_index(drop=True, inplace=True)

    df.to_csv(f'{SAVE_PATH}/may14_feats4.csv')

    counts = df['label'].value_counts()
    print('Number of samples per class:')
    for label, number in zip(counts.index, counts.values):
        print(f'{int_to_label[label]}: {number}')

In [44]:
subject_ids = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17]

for patient in subject_ids:
    print(f'Processing data for S{patient}...')
    make_patient_data(patient)


Processing data for S2...
wa
1970-01-01 00:00:00.000   -1.087837
1970-01-01 00:00:00.250   -0.475059
1970-01-01 00:00:00.500    0.963029
1970-01-01 00:00:00.750    2.456080
1970-01-01 00:00:01.000    2.687014
                             ...   
1970-01-01 01:41:17.750   -1.001519
1970-01-01 01:41:18.000   -0.998040
1970-01-01 01:41:18.250   -0.997933
1970-01-01 01:41:18.500   -1.002368
1970-01-01 01:41:18.750   -1.004089
Name: EDA, Length: 24316, dtype: float64
     pcost       dcost       gap    pres   dres
 0: -1.2090e+04 -1.2038e+04  5e+04  2e+02  2e-01
 1: -1.2090e+04 -2.0215e+04  1e+04  4e+01  5e-02
 2: -1.2098e+04 -1.6237e+04  4e+03  1e+01  2e-02
 3: -1.2098e+04 -1.5682e+04  4e+03  1e+01  1e-02
 4: -1.2102e+04 -1.3289e+04  1e+03  3e+00  4e-03
 5: -1.2099e+04 -1.2800e+04  7e+02  1e+00  2e-03
 6: -1.2095e+04 -1.2543e+04  4e+02  8e-01  9e-04
 7: -1.2089e+04 -1.2333e+04  2e+02  3e-01  4e-04
 8: -1.2102e+04 -1.2158e+04  6e+01  2e-02  3e-05
 9: -1.2126e+04 -1.2148e+04  2e+01  6e-03  7e

KeyboardInterrupt: 

In [None]:
combine_files(subject_ids)
print('Processing complete.')

   EDA_mean   EDA_std  EDA_min  EDA_max  EDA_phasic_mean  EDA_phasic_std  \
0  0.659074  0.141481  -358.13   554.77         0.659074        1.089131   
0  0.719731  0.141070  -358.13   554.77         0.719731        1.068293   
0  0.699985  0.141171  -358.13   554.77         0.699985        1.049086   
0  0.721821  0.142468  -358.13   554.77         0.721821        1.050560   
0  0.732099  0.141502  -358.13   554.77         0.732099        1.038914   
0  0.751310  0.139631  -358.13   554.77         0.751310        1.033578   
0  0.765666  0.137753  -358.13   554.77         0.765666        1.031534   
0  0.883424  0.136294  -358.13   554.77         0.883424        1.030761   
0  0.788890  0.135943  -392.28   554.77         0.788890        1.030042   
0  0.689091  0.135503  -392.28   554.77         0.689091        1.021168   

   EDA_phasic_min  EDA_phasic_max  EDA_smna_mean  EDA_smna_std  ...  \
0         -358.13          554.77       0.659074      1.952141  ...   
0         -358.13    