# Build Full Feature Table

In [1]:
import eegproc as eeg
import pandas as pd
# from ML import ICA
import ML.utils as utils
import warnings

warnings.filterwarnings("ignore")

FS = 128
# MAKE SURE FULL DREAMER.csv is there! Else download DREAMER.mat from Kaggle and run `start_dreamer.ipynb`
csv_path = "datasets/DREAMER.csv"  
dreamer_df = pd.read_csv(csv_path)
print(dreamer_df.columns)

features_table: pd.DataFrame = pd.DataFrame()

for patient_id in dreamer_df["patient_index"].unique():
    for video_id in dreamer_df["video_index"].unique():
        mask = (dreamer_df["patient_index"] == patient_id) & (
            dreamer_df["video_index"] == video_id
        )
        eeg_df = dreamer_df.loc[mask, :].copy()

        if eeg_df.empty:
            continue

        arousal = eeg_df["Arousal"].iloc[0]
        valence = eeg_df["Valence"].iloc[0]

        eeg_df = eeg_df.drop(
            columns=["patient_index", "video_index", "arousal", "valence"],
            errors="ignore",
        )

        freqs = {
            "delta": (0.5, 4.0),
            "theta": (4.0, 8.0),
            "alpha": (8.0, 13.0),
            "betaL": (13.0, 20.0),
            "betaH": (20.0, 30.0),
            "gamma": (30.0, 45.0)
        }
        ch_names = [
            "AF3",
            "F7",
            "F3",
            "FC5",
            "T7",
            "P7",
            "O1",
            "O2",
            "P8",
            "T8",
            "FC6",
            "F4",
            "F8",
            "AF4",
        ]

        clean = eeg.bandpass_filter(
            eeg_df, FS, bands=freqs, low=0.5, high=45.0, notch_hz=50
        )
        psd = eeg.psd_bandpowers(clean, FS, bands=freqs, overlap=0.5)
        shannons = eeg.shannons_entropy(psd, bands=freqs)
        da = utils.compute_asymmetry_from_psd(psd)
        hjorth = eeg.hjorth_params(clean, FS)
        wt = eeg.wavelet_band_energy(eeg_df, FS, bands=freqs)
        wt_entropy = eeg.wavelet_entropy(wt, bands=freqs)

        n = len(psd)

        if n >= 16: # selects middle 64s of video
            start = (n - 16) // 2
            end = start + 16
            psd = psd.iloc[start:end].reset_index(drop=True)
            shannons = shannons.iloc[start:end].reset_index(drop=True)
            hjorth = hjorth.iloc[start:end].reset_index(drop=True)
            wt = wt.iloc[start:end].reset_index(drop=True)
            wt_entropy = wt_entropy.iloc[start:end].reset_index(drop=True)
            da = da.iloc[start:end].reset_index(drop=True)
        
        n = len(psd) # resets n to new length
        
        meta = pd.DataFrame(
            {
                "patient_index": pd.Series([patient_id] * n),
                "video_index": pd.Series([video_id] * n),
                "arousal": pd.Series([int(arousal[1:-1])] * n),
                "valence": pd.Series([int(valence[1:-1])] * n),
            }
        )
        batch = pd.concat([meta, psd, shannons, hjorth, wt, wt_entropy, da], axis=1)
        # batch = pd.concat([meta, psd], axis=1)

        features_table = pd.concat([features_table, batch], ignore_index=True)

features_table.to_csv("datasets/features_table.csv")


print(features_table.head())

Index(['patient_index', 'video_index', 'Valence', 'Arousal', 'AF3', 'F7', 'F3',
       'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4'],
      dtype='object')
   patient_index  video_index  arousal  valence    AF3_delta   AF3_theta  \
0              0            0        3        4   516.772213  628.626194   
1              0            0        3        4  2423.037796  344.734873   
2              0            0        3        4  1074.801281  612.609187   
3              0            0        3        4  1517.929316  475.509765   
4              0            0        3        4   791.403457  483.249495   

   AF3_alpha  AF3_betaL   AF3_betaH  AF3_gamma  ...  P8_P7_gamma_da  \
0  54.718997  22.961835  666.319728   9.691387  ...       -0.906225   
1  22.161342  15.796736  637.181066  10.202771  ...       -0.761882   
2  26.159301  12.911582  662.645955   8.281819  ...       -0.614269   
3  55.107967  19.214215  658.764854   8.936880  ...       -0.565676   
4  26.059

## Add Emipirical Mode Decomposition features

In [6]:
from pathlib import Path


csv_path = Path("datasets/features_table_imf.csv")
open(csv_path, "w").close()

for patient_id in dreamer_df["patient_index"].unique():
    for video_id in dreamer_df["video_index"].unique():
        mask = (
            (dreamer_df["patient_index"] == patient_id)
            & (dreamer_df["video_index"] == video_id)
        )
        eeg_df = dreamer_df.loc[mask, :].copy()

        if eeg_df.empty:
            continue

        arousal = eeg_df["arousal"].iloc[0]
        valence = eeg_df["valence"].iloc[0]

        eeg_df = eeg_df.drop(columns=["patient_index", "video_index", "arousal", "valence"], errors="ignore")

        imf_energy  = eeg.imf_band_energy(eeg_df, FS).reset_index(drop=True)
        imf_entropy = eeg.imf_entropy(imf_energy).reset_index(drop=True)

        ft_slice = features_table.loc[
            (features_table["patient_index"] == patient_id)
            & (features_table["video_index"] == video_id)
        ].reset_index(drop=True)

        # Check if subject-video matches on all rows
        required_cols = ["patient_index", "video_index"]

        len_check = (len(ft_slice) == len(imf_energy) == len(imf_entropy))
        values_check = (
            len_check
            and ft_slice["patient_index"].eq(patient_id).all()
            and ft_slice["video_index"].eq(video_id).all()
        )

        if not values_check:
            raise ValueError(
                f"Mismatch for patient {patient_id}, video {video_id}: "
                f"ft_slice_len={len(ft_slice)}, imf_energy_len={len(imf_energy)}, "
                f"arousal/valence consistent={ft_slice[['arousal','valence']].nunique().le(1).all()}"
            )

        batch = pd.concat([ft_slice[required_cols], imf_energy, imf_entropy], axis=1)

        write_header = (not csv_path.exists()) or (csv_path.stat().st_size == 0)
        batch.to_csv(csv_path, mode="a", index=False, header=write_header)