# Build Full Feature Table

In [1]:
import eegproc as eeg
import pandas as pd
from ML import ICA
from ML import utils
import warnings

warnings.filterwarnings("ignore")

FS = 128
csv_path = "datasets/DREAMER.csv"
dreamer_df = pd.read_csv(csv_path)
print(dreamer_df.columns)

features_table: pd.DataFrame = pd.DataFrame()

for patient_id in dreamer_df["patient_index"].unique():
    for video_id in dreamer_df["video_index"].unique():
        mask = (dreamer_df["patient_index"] == patient_id) & (
            dreamer_df["video_index"] == video_id
        )
        eeg_df = dreamer_df.loc[mask, :].copy()

        if eeg_df.empty:
            continue

        arousal = eeg_df["arousal"].iloc[0]
        valence = eeg_df["valence"].iloc[0]

        eeg_df = eeg_df.drop(
            columns=["patient_index", "video_index", "arousal", "valence"],
            errors="ignore",
        )

        freqs = {
            "delta": (0.5, 4.0),
            "theta": (4.0, 8.0),
            "alpha": (8.0, 13.0),
            "beta": (13.0, 30.0),
            "gamma": (30.0, 45.0),
        }
        ch_names = [
            "AF3",
            "F7",
            "F3",
            "FC5",
            "T7",
            "P7",
            "O1",
            "O2",
            "P8",
            "T8",
            "FC6",
            "F4",
            "F8",
            "AF4",
        ]

        eeg_df, rep = ICA.ica_clean_eeg_df(
            eeg_df, fs=FS, ch_names=ch_names, method="fastica"
        )

        clean = eeg.bandpass_filter(
            eeg_df, FS, bands=eeg.FREQUENCY_BANDS, low=0.5, high=45.0, notch_hz=50
        )
        psd = eeg.psd_bandpowers(clean, FS, bands=freqs)
        shannons = eeg.shannons_entropy(clean, FS, bands=freqs)
        # hjorth = eeg.hjorth_params(clean, FS)
        # wt = eeg.wavelet_band_energy(eeg_df, FS, bands=freqs)
        # wt_entropy = eeg.wavelet_entropy(wt, bands=freqs)
        da = utils.compute_asymmetry_from_psd(psd)

        n = len(psd)
        meta = pd.DataFrame(
            {
                "patient_index": pd.Series([patient_id] * n),
                "video_index": pd.Series([video_id] * n),
                "arousal": pd.Series([int(arousal[1:-1])] * n),
                "valence": pd.Series([int(valence[1:-1])] * n),
            }
        )
        # batch = pd.concat([meta, psd, hjorth, shannons, wt, wt_entropy], axis=1)
        batch = pd.concat([meta, psd, shannons, da], axis=1)

        features_table = pd.concat([features_table, batch], ignore_index=True)


features_table.to_csv("datasets/features_table.csv")


print(features_table.head())

Index(['patient_index', 'video_index', 'arousal', 'valence', 'AF3', 'F7', 'F3',
       'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4'],
      dtype='object')
   patient_index  video_index  arousal  valence     AF3_delta    AF3_theta  \
0              0            0        3        4  14642.681979   841.930712   
1              0            0        3        4  17977.863680   812.550288   
2              0            0        3        4   2002.797100  1130.954619   
3              0            0        3        4    830.438210  1083.215726   
4              0            0        3        4    502.453891   408.521500   

    AF3_alpha  AF3_gamma      F7_delta    F7_theta  ...  P8_P7_delta_da  \
0   52.574662  36.574809  12568.490417  724.843903  ...       -1.882246   
1   65.313533  55.945507  15333.702353  690.274017  ...       -2.186180   
2  113.018234  26.080742   1713.376630  972.588864  ...       -1.997937   
3   90.503110  14.460857    693.686336  919.656585  

## Add Emipirical Mode Decomposition features

In [6]:
from pathlib import Path


csv_path = Path("datasets/features_table_imf.csv")
open(csv_path, "w").close()

for patient_id in dreamer_df["patient_index"].unique():
    for video_id in dreamer_df["video_index"].unique():
        mask = (
            (dreamer_df["patient_index"] == patient_id)
            & (dreamer_df["video_index"] == video_id)
        )
        eeg_df = dreamer_df.loc[mask, :].copy()

        if eeg_df.empty:
            continue

        arousal = eeg_df["arousal"].iloc[0]
        valence = eeg_df["valence"].iloc[0]

        eeg_df = eeg_df.drop(columns=["patient_index", "video_index", "arousal", "valence"], errors="ignore")

        imf_energy  = eeg.imf_band_energy(eeg_df, FS).reset_index(drop=True)
        imf_entropy = eeg.imf_entropy(imf_energy).reset_index(drop=True)

        ft_slice = features_table.loc[
            (features_table["patient_index"] == patient_id)
            & (features_table["video_index"] == video_id)
        ].reset_index(drop=True)

        # Check if subject-video matches on all rows
        required_cols = ["patient_index", "video_index"]

        len_check = (len(ft_slice) == len(imf_energy) == len(imf_entropy))
        values_check = (
            len_check
            and ft_slice["patient_index"].eq(patient_id).all()
            and ft_slice["video_index"].eq(video_id).all()
        )

        if not values_check:
            raise ValueError(
                f"Mismatch for patient {patient_id}, video {video_id}: "
                f"ft_slice_len={len(ft_slice)}, imf_energy_len={len(imf_energy)}, "
                f"arousal/valence consistent={ft_slice[['arousal','valence']].nunique().le(1).all()}"
            )

        batch = pd.concat([ft_slice[required_cols], imf_energy, imf_entropy], axis=1)

        write_header = (not csv_path.exists()) or (csv_path.stat().st_size == 0)
        batch.to_csv(csv_path, mode="a", index=False, header=write_header)