In [1]:
import os
import pandas as pd
import numpy as np
import time
from scipy.signal import welch
from scipy.integrate import trapezoid 

# EEG band definitions in Hz
band_ranges = {
    'delta': (0.5, 4),
    'theta': (4, 8),
    'alpha': (8, 13),
    'beta': (13, 30)
}


fs = 128  # Sampling rate (Hz)
window_duration = 5  # window length
stride_seconds = 2  # time between windows start
window_size = int(window_duration * fs)
stride_size = int(stride_seconds * fs)

def rms(x):
    return np.sqrt(np.mean(np.square(x)))

def compute_bandpower_per_channel(window_df):
    band_features = {}
    for ch in window_df.columns[:-2]:  
        f, Pxx = welch(window_df[ch], fs=fs, nperseg=256)
        for band_name, (low, high) in band_ranges.items():
            band_power = trapezoid(Pxx[(f >= low) & (f <= high)], f[(f >= low) & (f <= high)])
            band_features[f'{ch}_{band_name}'] = band_power
    return band_features

# final features table
feature_rows = []

base_path = 'DREAMER_CSV'

start_all = time.time()

for subject in sorted(os.listdir(base_path)):
    subject_path = os.path.join(base_path, subject)
    if not os.path.isdir(subject_path):
        continue

    for trial_file in sorted(os.listdir(subject_path)):
        if not trial_file.endswith('.csv'):
            continue

        trial_path = os.path.join(subject_path, trial_file)
        print(f" Processing {subject}/{trial_file} ...")
        start_trial = time.time()

        df = pd.read_csv(trial_path)
        valence = df['valence'].iloc[0]
        arousal = df['arousal'].iloc[0]

        start_idx = 0
        window_num = 1

        while start_idx + window_size <= len(df):
            end_idx = start_idx + window_size
            window_df = df.iloc[start_idx:end_idx]

            temporal_feats = {}
            for ch in df.columns[:-2]:  # only eeg
                x = window_df[ch].values
                temporal_feats[f'{ch}_mean'] = np.mean(x)
                temporal_feats[f'{ch}_std'] = np.std(x)
                temporal_feats[f'{ch}_rms'] = rms(x)

            band_feats = compute_bandpower_per_channel(window_df)

            features = {
                'subject': subject,
                'trial': trial_file.replace('.csv', ''),
                'window': window_num,
                'valence': valence,
                'arousal': arousal
            }
            features.update(temporal_feats)
            features.update(band_feats)
            feature_rows.append(features)

            # partial save every 1000 windows
            if len(feature_rows) % 1000 == 0:
                pd.DataFrame(feature_rows).to_csv('partial_progress.csv', index=False)
                print(f" Saved partial progress ({len(feature_rows)} rows)")

            start_idx += stride_size
            window_num += 1

        print(f" Done {subject}/{trial_file} in {time.time() - start_trial:.1f}s")

# final saving
features_df = pd.DataFrame(feature_rows)
features_df.to_csv('dreamer_feature_5s_window_2s_stride.csv', index=False)

print(" Feature extraction complete.")
print(f" Saved as: {os.path.abspath('dreamer_feature_5s_window_2s_stride.csv')}")
print(f" Total runtime: {time.time() - start_all:.1f} seconds")


 Processing subject_01/trial_01.csv ...
✅ Done subject_01/trial_01.csv in 2.3s
 Processing subject_01/trial_02.csv ...
✅ Done subject_01/trial_02.csv in 1.1s
 Processing subject_01/trial_03.csv ...
✅ Done subject_01/trial_03.csv in 2.4s
 Processing subject_01/trial_04.csv ...
✅ Done subject_01/trial_04.csv in 1.0s
 Processing subject_01/trial_05.csv ...
✅ Done subject_01/trial_05.csv in 0.7s
 Processing subject_01/trial_06.csv ...
✅ Done subject_01/trial_06.csv in 1.2s
 Processing subject_01/trial_07.csv ...
✅ Done subject_01/trial_07.csv in 1.5s
 Processing subject_01/trial_08.csv ...
✅ Done subject_01/trial_08.csv in 2.0s
 Processing subject_01/trial_09.csv ...
✅ Done subject_01/trial_09.csv in 0.7s
 Processing subject_01/trial_10.csv ...
✅ Done subject_01/trial_10.csv in 0.4s
 Processing subject_01/trial_11.csv ...
 Saved partial progress (1000 rows)
✅ Done subject_01/trial_11.csv in 0.8s
 Processing subject_01/trial_12.csv ...
✅ Done subject_01/trial_12.csv in 1.0s
 Processing subj