# Etapa 2 - Paso 2: Extracción de features multi-sujeto 

```mermaid
    graph LR
        A[1 <br> Detecta carpetas de sujetos automáticamente]-->B[ 2 <br> Carga de señales por sujeto]
        B-->C[3 <br> Ventaneo]
        C-->D[4 <br> Asignar etiqueta a la ventana - moda]
        D-->E[5 <br> Extraer features y almacenar si la etiqueta es 1, 2 o 3]
        E-->F[6 <br> Construir DataFrame]
        F-->G[7 <br> Guardar en un CSV]
```

In [1]:
import os
import re
import pickle
import numpy as np
import pandas as pd
from scipy.stats import mode


PATH_BASE = r"C:/Users/nat27/Desktop/Desktop/Proyectos/CienciaDatos/wesad-stress-classifier/data/WESAD"

EXCLUDE = {"S1", "S12"}  # sujetos a descartar
FS_TARGET = 4  # Hz después del resampleo
WINDOW_SEC = 60
STEP_SEC = 30

# Función auxiliar: extraer features de una ventana (Paso 5)
def extract_features(temp_win, eda_win, bvp_win):
    def feats(sig):
        return [
            np.mean(sig),
            np.std(sig),
            np.max(sig),
            np.min(sig),
            np.max(sig) - np.min(sig),
            np.mean(np.diff(sig))
        ]
    return feats(temp_win) + feats(eda_win) + feats(bvp_win)



In [2]:
# 1) Detectar carpetas de sujetos automáticamente
subject_dirs = []
if os.path.isdir(PATH_BASE):
    for name in os.listdir(PATH_BASE):
        if re.fullmatch(r"S\d+", name) and name not in EXCLUDE:
            pkl_path = os.path.join(PATH_BASE, name, f"{name}.pkl")
            if os.path.isfile(pkl_path):
                subject_dirs.append(name)

subject_dirs = sorted(subject_dirs, key=lambda s: int(s[1:]))

if not subject_dirs:
    raise FileNotFoundError("No se encontraron sujetos válidos en la carpeta especificada.")

print(f"Sujetos detectados: {subject_dirs}")

Sujetos detectados: ['S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9', 'S10', 'S11', 'S13', 'S14', 'S15', 'S16', 'S17']


In [None]:
# 2) Carga de señales (por sujeto)
all_rows = []
for subj in subject_dirs:
    pkl_path = os.path.join(PATH_BASE, subj, f"{subj}.pkl")
    with open(pkl_path, 'rb') as f:
        data = pickle.load(f, encoding='latin1')

    labels = data['label']
    temp_signal = data['signal']['wrist']['TEMP'].flatten()
    eda_signal = data['signal']['wrist']['EDA'].flatten()
    bvp_signal = data['signal']['wrist']['BVP'].flatten()

    # Downsample labels 700Hz → 4Hz
    resampled_labels = labels[::int(700/FS_TARGET)]

    # Downsample BVP 64Hz → 4Hz
    bvp_down = bvp_signal[::int(64/FS_TARGET)]

    # Ajustar longitudes iguales
    min_len = min(len(temp_signal), len(eda_signal), len(bvp_down), len(resampled_labels))
    temp_signal = temp_signal[:min_len]
    eda_signal = eda_signal[:min_len]
    bvp_down = bvp_down[:min_len]
    resampled_labels = resampled_labels[:min_len]

    # 3) Ventaneo
    window_size = FS_TARGET * WINDOW_SEC
    step_size = FS_TARGET * STEP_SEC

    for start in range(0, len(temp_signal) - window_size, step_size):
        end = start + window_size

        temp_win = temp_signal[start:end]
        eda_win = eda_signal[start:end]
        bvp_win = bvp_down[start:end]
        label_win = resampled_labels[start:end]

        # 4) Asignar etiqueta a la ventana (moda)
        lbl = int(mode(label_win, keepdims=True).mode[0])

        # 5) Extraer features y almacenar si la etiqueta es 1, 2 o 3
        if lbl in [1, 2, 3]:
            # Por cada señal (TEMP, EDA, BVP) calcula 6 estadísticas:
            feats = extract_features(temp_win, eda_win, bvp_win)
            all_rows.append([subj, lbl] + feats)

# 6) Construir DataFrame y mostrar resumen
columns = ["subject", "label"] + [
    "temp_mean", "temp_std", "temp_max", "temp_min", "temp_range", "temp_deriv",
    "eda_mean", "eda_std", "eda_max", "eda_min", "eda_range", "eda_deriv",
    "bvp_mean", "bvp_std", "bvp_max", "bvp_min", "bvp_range", "bvp_deriv"
]

df = pd.DataFrame(all_rows, columns=columns)

print("Shape final:", df.shape)
print(df.groupby(["subject", "label"]).size())

Shape final: (1105, 20)
subject  label
S10      1        39
         2        24
         3        12
S11      1        40
         2        23
         3        13
S13      1        40
         2        22
         3        13
S14      1        40
         2        22
         3        13
S15      1        39
         2        23
         3        13
S16      1        39
         2        22
         3        12
S17      1        39
         2        24
         3        12
S2       1        38
         2        21
         3        12
S3       1        38
         2        22
         3        13
S4       1        39
         2        21
         3        12
S5       1        40
         2        21
         3        12
S6       1        39
         2        22
         3        12
S7       1        39
         2        21
         3        13
S8       1        39
         2        23
         3        12
S9       1        39
         2        21
         3        12
dtype: int64


In [4]:
# 7) Guardar en un CSV
out_csv = "features_raw.csv"
df.to_csv(out_csv, index=False)
print(f"Archivo guardado: {out_csv}")

Archivo guardado: features_raw.csv
