# 02 — Signal Alignment & Windowing (WESAD)

This notebook performs:
- Filtering
- Resampling to a common frequency
- Correct WESAD label downsampling (700 Hz → 4 Hz)
- Sliding window segmentation (60 samples)
- Saving aligned windows for deep learning models


In [37]:
# =========================================================
# STEP 2 — MULTI-SUBJECT SIGNAL ALIGNMENT & WINDOWING
# =========================================================

import os
import pickle
import numpy as np
import scipy.signal as signal

# -------------------------------
# CONFIG
# -------------------------------
DATA_ROOT = "D:/Dissertation/Data/WESAD"
SUBJECTS = [f"S{i}" for i in range(2, 18)]

WINDOW_SIZE = 60
OVERLAP = 30
STEP = WINDOW_SIZE - OVERLAP

TARGET_FS = 4
ORIGINAL_LABEL_FS = 700
VALID_LABELS = [1, 2, 3]

# -------------------------------
# FILTERS
# -------------------------------
def butter_lowpass(data, cutoff, fs, order=4):
    b, a = signal.butter(order, cutoff / (fs / 2), btype="low")
    return signal.filtfilt(b, a, data)

def butter_bandpass(data, lowcut, highcut, fs, order=4):
    b, a = signal.butter(order, [lowcut / (fs / 2), highcut / (fs / 2)], btype="band")
    return signal.filtfilt(b, a, data)

# -------------------------------
# RESAMPLING
# -------------------------------
def resample_to(data, original_fs, target_fs):
    duration = len(data) / original_fs
    target_len = int(duration * target_fs)
    return signal.resample(data, target_len)

# -------------------------------
# LABEL DOWNSAMPLING (CRITICAL)
# -------------------------------
def downsample_labels(labels, original_fs, target_fs):
    factor = original_fs // target_fs
    trimmed_len = (len(labels) // factor) * factor
    labels = labels[:trimmed_len]
    labels = labels.reshape(-1, factor)
    return np.array([np.bincount(w.astype(int)).argmax() for w in labels])

# -------------------------------
# WINDOWING
# -------------------------------
def create_windows(signal_data, labels, window_size, step):
    X, y = [], []
    for start in range(0, len(signal_data) - window_size + 1, step):
        end = start + window_size
        X.append(signal_data[start:end])
        y.append(labels[start:end])
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.int32)

# =========================================================
# PROCESS EACH SUBJECT
# =========================================================
for subject in SUBJECTS:
    print(f"\n▶ Processing {subject}")

    pkl_path = os.path.join(DATA_ROOT, subject, f"{subject}.pkl")
    if not os.path.exists(pkl_path):
        print(f"⚠️ Missing {pkl_path}, skipping")
        continue

    with open(pkl_path, "rb") as f:
        data = pickle.load(f, encoding="latin1")

    signals = data["signal"]
    labels_700 = data["label"]

    wrist = signals["wrist"]

    # -------------------------------
    # RAW SIGNALS
    # -------------------------------
    EDA  = wrist["EDA"].flatten()        # 4 Hz
    BVP  = wrist["BVP"].flatten()        # 64 Hz
    TEMP = wrist["TEMP"].flatten()       # 4 Hz
    ACC  = wrist["ACC"][:, 0]            # 32 Hz (X-axis)

    # -------------------------------
    # FILTERING
    # -------------------------------
    EDA_f  = butter_lowpass(EDA, 1.0, 4)
    TEMP_f = butter_lowpass(TEMP, 0.5, 4)
    BVP_f  = butter_bandpass(BVP, 0.5, 8.0, 64)
    ACC_f  = butter_lowpass(ACC, 5.0, 32)

    # -------------------------------
    # RESAMPLING
    # -------------------------------
    EDA_r  = resample_to(EDA_f, 4, TARGET_FS)
    TEMP_r = resample_to(TEMP_f, 4, TARGET_FS)
    BVP_r  = resample_to(BVP_f, 64, TARGET_FS)
    ACC_r  = resample_to(ACC_f, 32, TARGET_FS)

    # -------------------------------
    # LABEL ALIGNMENT
    # -------------------------------
    labels_r = downsample_labels(labels_700, ORIGINAL_LABEL_FS, TARGET_FS)

    # -------------------------------
    # FILTER CLASSES
    # -------------------------------
    mask = np.isin(labels_r, VALID_LABELS)
    EDA_r, BVP_r, ACC_r, TEMP_r = EDA_r[mask], BVP_r[mask], ACC_r[mask], TEMP_r[mask]
    labels_r = labels_r[mask]

    # -------------------------------
    # WINDOWING
    # -------------------------------
    EDA_w, label_w = create_windows(EDA_r, labels_r, WINDOW_SIZE, STEP)
    BVP_w, _ = create_windows(BVP_r, labels_r, WINDOW_SIZE, STEP)
    ACC_w, _ = create_windows(ACC_r, labels_r, WINDOW_SIZE, STEP)
    TEMP_w,_ = create_windows(TEMP_r, labels_r, WINDOW_SIZE, STEP)

    final_labels = np.array([np.bincount(w).argmax() for w in label_w])

    # -------------------------------
    # SAVE
    # -------------------------------
    out_file = f"aligned_windows_{subject}_w60.npz"
    np.savez(
        out_file,
        EDA_windows=EDA_w,
        BVP_windows=BVP_w,
        ACC_windows=ACC_w,
        TEMP_windows=TEMP_w,
        label_windows=final_labels
    )

    print(f"✓ Saved {out_file}")
    print("  Windows:", EDA_w.shape)
    print("  Labels:", np.unique(final_labels, return_counts=True))

print("\n✅ STEP-2 MULTI-SUBJECT PREPROCESSING COMPLETE")



▶ Processing S2
✓ Saved aligned_windows_S2_w60.npz
  Windows: (281, 60)
  Labels: (array([1, 2, 3]), array([152,  82,  47]))

▶ Processing S3
✓ Saved aligned_windows_S3_w60.npz
  Windows: (286, 60)
  Labels: (array([1, 2, 3]), array([152,  85,  49]))

▶ Processing S4
✓ Saved aligned_windows_S4_w60.npz
  Windows: (287, 60)
  Labels: (array([1, 2, 3]), array([154,  84,  49]))

▶ Processing S5
✓ Saved aligned_windows_S5_w60.npz
  Windows: (294, 60)
  Labels: (array([1, 2, 3]), array([159,  85,  50]))

▶ Processing S6
✓ Saved aligned_windows_S6_w60.npz
  Windows: (292, 60)
  Labels: (array([1, 2, 3]), array([157,  87,  48]))

▶ Processing S7
✓ Saved aligned_windows_S7_w60.npz
  Windows: (292, 60)
  Labels: (array([1, 2, 3]), array([158,  85,  49]))

▶ Processing S8
✓ Saved aligned_windows_S8_w60.npz
  Windows: (293, 60)
  Labels: (array([1, 2, 3]), array([155,  88,  50]))

▶ Processing S9
✓ Saved aligned_windows_S9_w60.npz
  Windows: (291, 60)
  Labels: (array([1, 2, 3]), array([157,  86,