In [6]:
import os
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import scipy.signal as scisig
import scipy.stats
import cvxEDA

In [8]:
fs_dict = {'ACC': 700, 'ECG': 700, 'EMG': 700, 'EDA': 700, 'Temp': 700, 'Resp': 700, 'label': 700}
WINDOW_IN_SECONDS = 30
STRIDE_IN_SECONDS = 0.5
label_dict = {'baseline': 1, 'stress': 2, 'amusement': 3}
int_to_label = {1: 'baseline', 2: 'stress', 3: 'amusement'}
feat_names = None
DATA_PATH = r'C:\Users\IALAB\Downloads\WESAD-master\data\WESAD/'
SAVE_PATH = r'C:\Users\IALAB\Downloads\WESAD-master\data_sync_30_05/'


In [None]:
if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)

In [None]:
#def eda_stats(y):
#    Fs = fs_dict['EDA']
#    yn = (y - y.mean()) / y.std()
#    print(yn)
#    print("calculating eda stats")
#    [r, p, t, l, d, e, obj] = cvxEDA.cvxEDA(yn, 1. / Fs)
#    return [r, p, t, l, d, e, obj]

In [None]:
def eda_stats(y, ventana_segundos):
    Fs = fs_dict['EDA']  # Frecuencia de muestreo (700 Hz)
    ventana_muestras = ventana_segundos * Fs  # Convertir segundos a muestras

    # Inicializar listas vacías para almacenar los resultados completos
    eda_phasic = []
    eda_smna = []
    eda_tonic = []
    scr_counts = []  # Para contar los SCR (respuestas de la piel)

    print("Procesando EDA en segmentos...")

    for inicio in range(0, len(y), ventana_muestras):  
        fin = min(inicio + ventana_muestras, len(y))
        y_segmento = y[inicio:fin]

        # Normalizar el segmento
        yn = (y_segmento - y_segmento.mean()) / y_segmento.std()

        if len(yn) < 2:  # Evitar procesar segmentos vacíos
            continue  

        # Aplicar cvxEDA
        r, p, t, _, _, _, _ = cvxEDA.cvxEDA(yn, 1. / Fs)

        # Concatenar los resultados en las listas
        eda_phasic.extend(r)
        eda_smna.extend(p)
        eda_tonic.extend(t)
        
        # Contar SCR como cambios positivos en la señal fásica
        scr_counts.append(np.sum(np.diff(p) > 0.01))  

    print("✅ Procesamiento de EDA completado")
    
    # Convertir a arrays de numpy antes de retornar
    return np.array(eda_phasic), np.array(eda_smna), np.array(eda_tonic), np.array(scr_counts)


In [None]:
class SubjectData:

    def __init__(self, main_path, subject_number):
        self.name = f'S{subject_number}'
        self.subject_keys = ['signal', 'label', 'subject']
        self.signal_keys = ['chest', 'wrist']
        self.chest_keys = ['ACC', 'ECG', 'EMG', 'EDA', 'Temp', 'Resp']
        self.wrist_keys = ['ACC', 'BVP', 'EDA', 'TEMP']
        with open(os.path.join(main_path, self.name) + '/' + self.name + '.pkl', 'rb') as file:
            self.data = pickle.load(file, encoding='latin1')
        self.labels = self.data['label']

    def get_wrist_data(self):
        data = self.data['signal']['wrist']
        data.update({'Resp': self.data['signal']['chest']['Resp']})
        return data

    def get_chest_data(self):
        return self.data['signal']['chest']

    def extract_features(self):  # only wrist
        results = \
            {
                key: get_statistics(self.get_chest_data()[key].flatten(), self.labels, key)
                for key in self.chest_keys
            }
        return results

In [None]:
def butter_lowpass(cutoff, fs, order=5):
    # Filtering Helper functions
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = scisig.butter(order, normal_cutoff, btype='low', analog=False)
    return b, a


def butter_lowpass_filter(data, cutoff, fs, order=5):
    # Filtering Helper functions
    b, a = butter_lowpass(cutoff, fs, order=order)
    y = scisig.lfilter(b, a, data)
    return y

def get_slope(series):
    linreg = scipy.stats.linregress(np.arange(len(series)), series )
    slope = linreg[0]
    return slope

def get_peak_freq(x, fs):
    f, Pxx = scisig.periodogram(x, fs=fs)
    psd_dict = {amp: freq for amp, freq in zip(Pxx, f)}
    return psd_dict[max(psd_dict.keys())]

def get_window_stats(data, label=-1):
    mean_features = np.mean(data)
    std_features = np.std(data)
    min_features = np.amin(data)
    max_features = np.amax(data)

    features = {'mean': mean_features, 'std': std_features, 'min': min_features, 'max': max_features,
                'label': label}
    return features


def get_net_accel(data):
    return (data['ACC_x'] ** 2 + data['ACC_y'] ** 2 + data['ACC_z'] ** 2).apply(lambda x: np.sqrt(x))


def get_peak_freq(x):
    f, Pxx = scisig.periodogram(x, fs=8)
    psd_dict = {amp: freq for amp, freq in zip(Pxx, f)}
    peak_freq = psd_dict[max(psd_dict.keys())]
    return peak_freq


# https://github.com/MITMediaLabAffectiveComputing/eda-explorer/blob/master/AccelerometerFeatureExtractionScript.py
def filterSignalFIR(eda, cutoff=0.4, numtaps=64):
    f = cutoff / (fs_dict['ACC'] / 2.0)
    FIR_coeff = scisig.firwin(numtaps, f)

    return scisig.lfilter(FIR_coeff, 1, eda.flatten())

In [None]:
def compute_features(data_dict, labels, fs_dict, norm_type=None):
    ecg_df = pd.DataFrame(data_dict['ECG'], columns=['ECG'])
    eda_df = pd.DataFrame(data_dict['EDA'], columns=['EDA'])
    emg_df = pd.DataFrame(data_dict['EMG'], columns=['EMG'])
    temp_df = pd.DataFrame(data_dict['Temp'], columns=['Temp'])
    resp_df = pd.DataFrame(data_dict['Resp'], columns=['Resp'])
    label_df = pd.DataFrame(labels, columns=['label'])
    print(fs_dict.keys())
    
    eda_df['EDA'] = butter_lowpass_filter(eda_df['EDA'], 5.0, fs_dict['EDA'], 6)  # Filtro 5 Hz
    resp_df['Resp'] = butter_lowpass_filter(resp_df['Resp'], 0.35, fs_dict['Resp'], 4)  # 0.1 - 0.35 Hz
    
    # Calcular estadísticas de EDA
    r, p, t, num_scr = eda_stats(eda_df['EDA'].values.flatten(), WINDOW_IN_SECONDS)
    eda_df['EDA_phasic'] = r
    print(eda_df['EDA_phasic'].shape)
    eda_df['EDA_smna'] = p
    print(eda_df['EDA_smna'].shape)
    eda_df['EDA_tonic'] = t
    print(eda_df['EDA_tonic'].shape)
    #eda_df['SCR_count'] = num_scr
    
    # Calcular frecuencia cardíaca
    hr_values = 60 / np.diff(np.where(ecg_df['ECG'] > np.mean(ecg_df['ECG']))[0])

    # Interpolar para que coincida con el tamaño de ecg_df
    ecg_df['HR'] = np.interp(np.arange(len(ecg_df)), np.arange(len(hr_values)), hr_values)
    print(ecg_df['HR'].shape)

    # Aplicar el filtro después
    ecg_df['HR'] = butter_lowpass_filter(ecg_df['HR'], 2.0, fs_dict['ECG'], 4)
    
    # Calcular pendiente de temperatura
    temp_df['Temp_slope'] = get_slope(temp_df['Temp'])
    print(temp_df['Temp_slope'].shape)
    
    # Calcular tasa de respiración
    resp_rate = np.sum(np.diff(resp_df['Resp']) > 0) / (len(resp_df) / fs_dict['Resp'])
    resp_df['Resp_rate'] = resp_rate
    print(resp_df['Resp_rate'].shape)
    
    # Unir los DataFrames
    df = eda_df.join(ecg_df, how='outer')
    df = df.join(emg_df, how='outer')
    df = df.join(temp_df, how='outer')
    df = df.join(resp_df, how='outer')
    df = df.join(label_df, how='outer')
    df['label'] = df['label'].fillna(method='bfill')
    df.reset_index(drop=True, inplace=True)
    
    # Normalización
    if norm_type == 'std':
        df = (df - df.mean()) / df.std()
    elif norm_type == 'minmax':
        df = (df - df.min()) / (df.max() - df.min())
    
    # Agrupar por etiqueta
    grouped = df.groupby('label')
    print(grouped)

    baseline = grouped.get_group(1)
    print(baseline)
    stress = grouped.get_group(2)
    print(stress)
    amusement = grouped.get_group(3)
    print(amusement)

    return grouped, baseline, stress, amusement

In [None]:
def get_samples(data, label, stride_seconds):
    global feat_names
    global WINDOW_IN_SECONDS

    samples = []
    
    # Convertir tiempo a muestras
    window_len = int(fs_dict['label'] * WINDOW_IN_SECONDS)  # Asegurar que sea entero
    stride_len = int(fs_dict['label'] * stride_seconds)  # Convertir stride a entero

    print("El numero de datos esperados es de: " + str(((len(data) - window_len) / stride_len) + 1))

    # Crear ventanas solapadas
    for start in range(0, len(data) - window_len + 1, stride_len):

        print(f"Ventana desde {start} hasta {start + window_len} (Índice {start})..........................")
        # Extraer ventana
        w = data[start:start + window_len]

        # Calcular estadísticas
        wstats = get_window_stats(w, label=label)

        if not wstats:  # Si get_window_stats() no devuelve nada, continuar
            print(f"⚠️ Advertencia: Ventana vacía en índice {start}, saltando...")
            continue

        # Formatear en DataFrame
        x = pd.DataFrame(wstats).drop('label', axis=0)
        y = label  
        x.drop('label', axis=1, inplace=True)

        if feat_names is None:
            feat_names = ['{}_{}'.format(row, col) for row in x.index for col in x.columns]

        # **Asegurar que 'wdf' existe antes de usarlo**
        wdf = pd.DataFrame(x.values.flatten()).T
        wdf.columns = feat_names

        # Verificar si feat_names y wdf tienen la misma cantidad de columnas
        if len(feat_names) != wdf.shape[1]:
            print(f"⚠️ Advertencia: Ajustando feat_names ({len(feat_names)}) a {wdf.shape[1]}")
            feat_names = feat_names[:wdf.shape[1]]  # Ajustamos para evitar error

        wdf = pd.concat([wdf, pd.DataFrame({'label': y}, index=[0])], axis=1)

        # Extraer más características SOLO si existen en la ventana
        if 'TEMP' in w:
            wdf['TEMP_slope'] = get_slope(w['TEMP'].dropna())

        # Guardar ventana
        samples.append(wdf)

    if not samples:
        print("⚠️ Advertencia: No se generaron muestras en get_samples(), devolviendo DataFrame vacío.")
        return pd.DataFrame()  # Retornar DataFrame vacío en caso de error

    return pd.concat(samples)




In [None]:
def make_patient_data(subject_id):
    global SAVE_PATH
    global WINDOW_IN_SECONDS

    # Make subject data object for Sx
    subject = SubjectData(main_path=r'C:\Users\IALAB\Downloads\WESAD-master\data\WESAD', subject_number=subject_id)

    # Empatica E4 data - now with resp
    data_dict = subject.get_chest_data()

    print(data_dict.keys())
    print(subject.labels)

    # norm type
    norm_type = None

    # The 3 classes we are classifying

    grouped, baseline, stress, amusement = compute_features(data_dict, subject.labels, fs_dict=fs_dict , norm_type=norm_type)
    print("features calculated")
    # print(f'Available windows for {subject.name}:')
    samples_per_window = int(fs_dict['label'] * WINDOW_IN_SECONDS)
    stride_per_window = int(fs_dict['label'] * STRIDE_IN_SECONDS)

    n_baseline_wdws = (len(baseline) - samples_per_window) / stride_per_window + 1
    n_stress_wdws = (len(stress) - samples_per_window) / stride_per_window + 1
    n_amusement_wdws = (len(amusement) - samples_per_window) / stride_per_window + 1
    # print(f'Baseline: {n_baseline_wdws}\nStress: {n_stress_wdws}\nAmusement: {n_amusement_wdws}\n')
    print(f"Procesando S{subject_id}:")
    print(f"  - Baseline windows: {n_baseline_wdws}")
    print(f"  - Stress windows: {n_stress_wdws}")
    print(f"  - Amusement windows: {n_amusement_wdws}")
    #
    baseline_samples = get_samples(baseline, label=1, stride_seconds = STRIDE_IN_SECONDS)
    # Downsampling
    # baseline_samples = baseline_samples[::2]
    stress_samples = get_samples(stress, label=2, stride_seconds = STRIDE_IN_SECONDS)
    amusement_samples = get_samples(amusement, label=3, stride_seconds = STRIDE_IN_SECONDS)

    all_samples = pd.concat([baseline_samples, stress_samples, amusement_samples])
    all_samples['label'] = all_samples['label'].astype(int)
    all_samples = pd.concat([all_samples.drop('label', axis=1), pd.get_dummies(all_samples['label'])], axis=1)
    # Selected Features
    # all_samples = all_samples[['EDA_mean', 'EDA_std', 'EDA_min', 'EDA_max',
    #                          'BVP_mean', 'BVP_std', 'BVP_min', 'BVP_max',
    #                        'TEMP_mean', 'TEMP_std', 'TEMP_min', 'TEMP_max',
    #                        'net_acc_mean', 'net_acc_std', 'net_acc_min', 'net_acc_max',
    #                        0, 1, 2]]
    # Save file as csv (for now)
    all_samples.to_csv(f'{SAVE_PATH}/S{subject_id}_feats_4.csv')

    # Does this save any space?
    subject = None

In [9]:
def combine_files(subjects):
    df_list = []
    for s in subjects:
        df = pd.read_csv(f'{SAVE_PATH}/S{s}_feats_4.csv', index_col=0)
        df['subject'] = s
        df_list.append(df)

    df = pd.concat(df_list)

    print(df.head(10))
    print(df.columns)

    df['label'] = df[['1', '2', '3']].idxmax(axis=1).astype(int)
    df.drop(['1', '2', '3'], axis=1, inplace=True)

    df.reset_index(drop=True, inplace=True)

    df.to_csv(f'{SAVE_PATH}/may14_feats4.csv')

    counts = df['label'].value_counts()

    print("Índices en counts:", counts.index.tolist())
    print("Claves en int_to_label:", int_to_label.keys())

    print('Number of samples per class:')
    for label, number in zip(counts.index, counts.values):
        print(f'{int_to_label[label]}: {number}')

In [None]:
subject_ids = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17]

for patient in subject_ids:
    print(f'Processing data for S{patient}...')
    make_patient_data(patient)


In [10]:
subject_ids = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17]

combine_files(subject_ids)
print('Processing complete.')

    EDA_mean   EDA_std   EDA_min     EDA_max  EDA_phasic_mean  EDA_phasic_std  \
0  37.554971  0.182977 -6.214904  489.071196        37.554971        0.436898   
0  37.554011  0.173115 -6.214904  489.071196        37.554011        0.387536   
0  37.547265  0.166094 -6.796271  489.071196        37.547265        0.350867   
0  37.536517  0.161628 -6.839681  489.071196        37.536517        0.327099   
0  37.531246  0.157913 -6.839681  489.071196        37.531246        0.308344   
0  37.532148  0.154735 -6.839681  489.071196        37.532148        0.293787   
0  37.532966  0.152905 -6.839681  489.071196        37.532966        0.284846   
0  37.531443  0.152449 -6.839681  489.071196        37.531443        0.279007   
0  37.529940  0.151842 -6.839681  489.071196        37.529940        0.275925   
0  37.525450  0.150702 -6.839681  489.071196        37.525450        0.275287   

   EDA_phasic_min  EDA_phasic_max  EDA_smna_mean  EDA_smna_std  ...  Resp_min  \
0       -6.214904      489.