# Features 
| Column Name       | Description                                                                 | Type      | Mathematical Basis / Formula |
|--------------------|------------------------------------------------------------------------------|------------|--------------------------------|
| **Subject**        | Participant identifier.                                                     | int        | — |
| **SedationLabel**  | Sedation level label: Baseline, Mild, Moderate, Recovery.                   | string     | — |
| **Band**           | EEG frequency band (Delta, Theta, Alpha, Beta, Gamma).                      | string     | Defined by frequency bounds $(f_{min}, f_{max})$ |
| **mean_degree**    | Average weighted node degree — mean sum of connection weights per node.     | float      | $k_i = \sum_j w_{ij}$, $\langle k \rangle = \frac{1}{N} \sum_i k_i$ |
| **clustering**     | Weighted clustering coefficient — measures local interconnectedness.         | float      | $C = \frac{1}{N} \sum_i \frac{(W^{1/3})_{ij}(W^{1/3})_{jk}(W^{1/3})_{ki}}{k_i(k_i - 1)}$ |
| **path_length**    | Characteristic path length — mean shortest path between all node pairs.     | float      | $L = \frac{1}{N(N-1)} \sum_{i \ne j} d_{ij}$, $d_{ij} = 1 / w_{ij}$ |
| **small_worldness**| Small-worldness coefficient — ratio of normalized clustering to path length. | float      | $\sigma = \frac{C / C_{rand}}{L / L_{rand}}$ |
| **Propofol_ugL**   | Propofol plasma concentration at recording time (µg/L).                     | float      | — |
| **RT_ms**          | Mean reaction time (ms) in behavioral task.                                 | float      | — |
| **Correct**        | Number of correct responses (max = 40).                                     | int        | — |
| **Responsiveness**      | Behavioral responsiveness classification: 1 = responsive, 0 = drowsy/unresponsive. Based on overlap of binomial confidence intervals between baseline and moderate sedation hit-rate distributions.         | int        | $R = 1 \text{ if } CI_{\text{mod}} \cap CI_{\text{base}} \neq \emptyset,\ 0 \text{ otherwise}$ |

In [6]:
import numpy as np
import pandas as pd
import mne
from mne_connectivity import spectral_connectivity_epochs
from pathlib import Path
from statsmodels.stats.proportion import proportion_confint
from tqdm import tqdm
import warnings
import sys

sys.path.append('../src')
from connectivity import compute_dwpli
from graph_metrics import compute_graph_metrics

warnings.filterwarnings("ignore")

FREQ_BANDS = {
    "delta": (1, 4),
    "theta": (4, 8),
    "alpha": (8, 13),
    "beta": (13, 30),
    "gamma": (30, 45),
}

MANIFEST_PATH = Path("../data/data_derivatives/manifests/manifest.csv")
manifest = pd.read_csv(MANIFEST_PATH)

# --- Resolve paths relative to the project root ---
# (Assumes notebook is inside /notebooks)
PROJECT_ROOT = Path.cwd().parent
manifest["SetPath"] = manifest["SetPath"].apply(lambda p: (PROJECT_ROOT / p).resolve())

# --- Subset for testing ---
subset_manifest = manifest[manifest["Subject"].isin([1, 2])]


In [None]:
records = []

sedation_map = {
    "Baseline": 1,
    "Mild": 2,
    "Moderate": 3,
    "Recovery": 4
}

for idx in range(len(manifest)):
    row = manifest.iloc[idx]
    set_path = Path(row["SetPath"])
    
    if not set_path.exists():
        print(f"File not found: {set_path}")
        continue

    # Load EEG epochs
    try:
        epochs = mne.io.read_epochs_eeglab(set_path, verbose="error")
    except Exception as e:
        print(f"Could not load {set_path.name}: {e}")
        continue

    # Loop over frequency bands
    for band, (fmin, fmax) in FREQ_BANDS.items():
        try:
            con_matrix = compute_dwpli(epochs, fmin, fmax)

            if np.isnan(con_matrix).all() or con_matrix.shape[0] == 0:
                raise ValueError(f"Empty or NaN matrix in {band}")

            metrics = compute_graph_metrics(con_matrix, n_rand=10)

            record = {
                "Subject": row["Subject"],
                "SedationLabel": row["SedationLabel"],
                "SedationLevel": sedation_map.get(row["SedationLabel"], None),
                "Band": band,
                **metrics,
                "Propofol_ugL": row["Propofol_ugL"],
                "RT_ms": row["RT_ms"],
                "Correct": row["Correct"],
            }
            records.append(record)

        except Exception as e:
            print(f"❌ Error in {row['BaseName']} ({band}): {type(e).__name__}: {e}")
            continue

df_metrics = pd.DataFrame(records)
df_metrics.to_csv("../data/data_derivatives/features.csv", index=False)

In [13]:
from normalization import normalize_within_subject

feature_cols = [
    "mean_degree", "clustering", "path_length", "global_efficiency",
    "local_efficiency", "modularity", "participation_coefficient", "small_worldness"
]

df_delta = normalize_within_subject(df_metrics, feature_cols)
df_delta.to_csv("../data/data_derivatives/features_within_subject.csv", index=False)