In [None]:
import pickle
import numpy as np
import pandas as pd
from scipy.signal import welch
import os


INPUT_FILE = "../data/processed/DREAMER_filtered.pkl"
OUTPUT_FILE = "../data/processed/eeg_features_concat.csv"

# DREAMER Channel Order (Standard Emotiv EPOC)
CHANNEL_NAMES = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 
                 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']

# Frequency Bands (Hz)
BANDS = {
    'Theta': (4, 8),
    'Alpha': (8, 13),
    'Beta':  (13, 30)
}

SAMPLING_RATE = 128  



def get_band_power(signal, fs, band):
    """
    Computes average power in a specific frequency band using Welch's method.
    """
    low, high = band
    # Compute Power Spectral Density (PSD) -> to compute power in frequency bands
    f, Pxx = welch(signal, fs=fs, nperseg=256, axis=0)
    
    # Find indices for the band
    idx_band = np.logical_and(f >= low, f <= high)
    
    # Mean power in that band
    # Pxx shape: (Frequencies, Channels)
    band_power = np.mean(Pxx[idx_band, :], axis=0)
    return band_power

def compute_rms(signal):
    """Computes Root Mean Square amplitude."""
    return np.sqrt(np.mean(signal**2, axis=0))


print(f"Loading data from {INPUT_FILE}...")
with open(INPUT_FILE, "rb") as f:
    df = pickle.load(f)

print(f"Data loaded. Found {len(df)} trials.")
print("Starting feature extraction (Concatenation Mode)...")

all_rows = []

# Loop through every trial in the dataset
for i, row in df.iterrows():
    
    # 1. Get raw signals
    baseline = row['baseline']  # Shape: (Time, 14)
    stimuli  = row['stimuli']   # Shape: (Time, 14)
    
    # Initialize dictionary for this trial
    features = {
        'subject_id': row['subject_id'],
        'video_id':   row['video_id'],
        'valence':    row['valence'],
        'arousal':    row['arousal']
    }
    
 
    rms_b = compute_rms(baseline)
    rms_s = compute_rms(stimuli)
    
    for ch_idx, ch_name in enumerate(CHANNEL_NAMES):
        features[f"Base_RMS_{ch_name}"] = rms_b[ch_idx]
        features[f"Stim_RMS_{ch_name}"] = rms_s[ch_idx]


    for band_name, (low, high) in BANDS.items():
        
        pow_b = get_band_power(baseline, SAMPLING_RATE, (low, high))
        pow_s = get_band_power(stimuli,  SAMPLING_RATE, (low, high))
        
        for ch_idx, ch_name in enumerate(CHANNEL_NAMES):
            features[f"Base_{band_name}_{ch_name}"] = pow_b[ch_idx]
            features[f"Stim_{band_name}_{ch_name}"] = pow_s[ch_idx]


 
    alpha_b = get_band_power(baseline, SAMPLING_RATE, BANDS['Alpha'])
    alpha_s = get_band_power(stimuli,  SAMPLING_RATE, BANDS['Alpha'])
    
    
    pairs = [(2, 11), (1, 12), (0, 13)] # (F3, F4), (F7, F8), (AF3, AF4)
    
    # Average the Left and Right Alpha powers
    left_b = np.mean([alpha_b[l] for l, r in pairs])
    right_b = np.mean([alpha_b[r] for l, r in pairs])
    
    left_s = np.mean([alpha_s[l] for l, r in pairs])
    right_s = np.mean([alpha_s[r] for l, r in pairs])
    
    # Calculate FAA using standard log formula: ln(Right) - ln(Left)
    features['Base_FAA'] = np.log(right_b + 1e-10) - np.log(left_b + 1e-10)
    features['Stim_FAA'] = np.log(right_s + 1e-10) - np.log(left_s + 1e-10)


    all_rows.append(features)
    
    if i % 100 == 0:
        print(f"Processed {i}/{len(df)} trials...")


features_df = pd.DataFrame(all_rows)

features_df = features_df.fillna(0)

print(f"Finished! Final shape: {features_df.shape}")
features_df.to_csv(OUTPUT_FILE, index=False)
print(f"Saved concatenated features to: {OUTPUT_FILE}")

Loading data from ../data/processed/DREAMER_filtered.pkl...
Data loaded. Found 414 trials.
Starting feature extraction (Concatenation Mode)...
Processed 0/414 trials...
Processed 100/414 trials...
Processed 200/414 trials...
Processed 300/414 trials...
Processed 400/414 trials...
Finished! Final shape: (414, 118)
Saved concatenated features to: ../data/processed/eeg_features_concat.csv
