# Features 
| Column Name       | Description                                                                 | Type      | Mathematical Basis / Formula |
|--------------------|------------------------------------------------------------------------------|------------|--------------------------------|
| **Subject**        | Participant identifier.                                                     | int        | — |
| **SedationLabel**  | Sedation level label: Baseline, Mild, Moderate, Recovery.                   | string     | — |
| **Band**           | EEG frequency band (Delta, Theta, Alpha, Beta, Gamma).                      | string     | Defined by frequency bounds $(f_{min}, f_{max})$ |
| **mean_degree**    | Average weighted node degree — mean sum of connection weights per node.     | float      | $k_i = \sum_j w_{ij}$, $\langle k \rangle = \frac{1}{N} \sum_i k_i$ |
| **clustering**     | Weighted clustering coefficient — measures local interconnectedness.         | float      | $C = \frac{1}{N} \sum_i \frac{(W^{1/3})_{ij}(W^{1/3})_{jk}(W^{1/3})_{ki}}{k_i(k_i - 1)}$ |
| **path_length**    | Characteristic path length — mean shortest path between all node pairs.     | float      | $L = \frac{1}{N(N-1)} \sum_{i \ne j} d_{ij}$, $d_{ij} = 1 / w_{ij}$ |
| **small_worldness**| Small-worldness coefficient — ratio of normalized clustering to path length. | float      | $\sigma = \frac{C / C_{rand}}{L / L_{rand}}$ |
| **Propofol_ugL**   | Propofol plasma concentration at recording time (µg/L).                     | float      | — |
| **RT_ms**          | Mean reaction time (ms) in behavioral task.                                 | float      | — |
| **Correct**        | Number of correct responses (max = 40).                                     | int        | — |
| **Responsiveness**      | Behavioral responsiveness classification: 1 = responsive, 0 = drowsy/unresponsive. Based on overlap of binomial confidence intervals between baseline and moderate sedation hit-rate distributions.         | int        | $R = 1 \text{ if } CI_{\text{mod}} \cap CI_{\text{base}} \neq \emptyset,\ 0 \text{ otherwise}$ |

In [1]:
# --- Project path setup ---
import sys
from pathlib import Path

ROOT = Path(__file__).resolve().parents[1] if "__file__" in globals() else Path.cwd().parent
sys.path.append(str(ROOT / "src"))

In [2]:
import numpy as np
import pandas as pd
import mne
from pathlib import Path
import importlib
from transformation import compute_band_ratios
from normalization import normalize_within_subject
from connectivity import compute_dwpli, compute_dwpli_per_epoch
from graph_metrics import compute_graph_metrics
import os

FREQ_BANDS = {
    "delta": (1, 4),
    "theta": (4, 8),
    "alpha": (8, 13),
    "beta": (13, 30),
    "gamma": (30, 45),
}

MANIFEST_PATH = Path("../data/data_derivatives/manifests/manifest.csv")
manifest = pd.read_csv(MANIFEST_PATH)

# --- Resolve paths relative to the project root ---
# (Assumes notebook is inside /notebooks)
PROJECT_ROOT = Path.cwd().parent
manifest["SetPath"] = manifest["SetPath"].apply(lambda p: (PROJECT_ROOT / p).resolve())

# --- Subset for testing ---
subset_manifest = manifest[manifest["Subject"].isin([1, 2])]


In [None]:
# Version for epochs averaged
records = []

sedation_map = {
    "Baseline": 1,
    "Mild": 2,
    "Moderate": 3,
    "Recovery": 4
}

for idx in range(len(manifest)):
    row = manifest.iloc[idx]
    set_path = Path(row["SetPath"])
    
    if not set_path.exists():
        print(f"File not found: {set_path}")
        continue

    # Load EEG epochs
    try:
        epochs = mne.io.read_epochs_eeglab(set_path, verbose="error")
    except Exception as e:
        print(f"Could not load {set_path.name}: {e}")
        continue

    # Loop over frequency bands
    for band, (fmin, fmax) in FREQ_BANDS.items():
        try:
            con_matrix = compute_dwpli(epochs, fmin, fmax)

            if np.isnan(con_matrix).all() or con_matrix.shape[0] == 0:
                raise ValueError(f"Empty or NaN matrix in {band}")

            metrics = compute_graph_metrics(con_matrix, n_rand=10)

            record = {
                "Subject": row["Subject"],
                "SedationLabel": row["SedationLabel"],
                "SedationLevel": sedation_map.get(row["SedationLabel"], None),
                "Band": band,
                **metrics,
                "Propofol_ugL": row["Propofol_ugL"],
                "RT_ms": row["RT_ms"],
                "Correct": row["Correct"],
            }
            records.append(record)

        except Exception as e:
            print(f"Error in {row['BaseName']} ({band}): {type(e).__name__}: {e}")
            continue

df_metrics = pd.DataFrame(records)
df_metrics.to_csv("../data/data_derivatives/features.csv", index=False)

In [12]:
# Version for per-epoch metrics (not averaged)

records = []

manifest = subset_manifest  

sedation_map = {
    "Baseline": 1,
    "Mild": 2,
    "Moderate": 3,
}

for idx in range(len(manifest)):
    row = manifest.iloc[idx]
    set_path = Path(row["SetPath"])
    label = row["SedationLabel"]

    if label == "Recovery":
        continue

    if not set_path.exists():
        print(f"File not found: {set_path}")
        continue

    #  Load EEG epochs 
    try:
        epochs = mne.io.read_epochs_eeglab(set_path, verbose="error")
    except Exception as e:
        print(f"Could not load {set_path.name}: {e}")
        continue

    #  Loop over frequency bands 
    for band, (fmin, fmax) in FREQ_BANDS.items():
        try:
            # Compute one connectivity matrix per epoch
            con_matrices = compute_dwpli_per_epoch(epochs, fmin, fmax)

            for ep_idx, con_matrix in enumerate(con_matrices):
                if np.isnan(con_matrix).all() or con_matrix.shape[0] == 0:
                    raise ValueError(f"Empty or NaN matrix in {band}, epoch {ep_idx}")

                metrics = compute_graph_metrics(con_matrix, n_rand=10)

                record = {
                    "Subject": row["Subject"],
                    "Epoch": ep_idx,
                    "SedationLabel": label,
                    "SedationLevel": sedation_map.get(label, None),
                    "Band": band,
                    **metrics,
                    "Propofol_ugL": row["Propofol_ugL"],
                    "RT_ms": row["RT_ms"],
                    "Correct": row["Correct"],
                }
                records.append(record)

        except Exception as e:
            print(f"Error in {row['BaseName']} ({band}): {type(e).__name__}: {e}")
            continue

#  Combine and save 
df_metrics = pd.DataFrame(records)
df_metrics.to_csv("../data/data_derivatives/features_per_epoch.csv", index=False)

  warn(
  warn(
  warn(


In [4]:
# Transformations

df_ratios = compute_band_ratios(
    df_metrics,
    output_path="../data/data_derivatives/features_band_ratios.csv"
)


In [13]:
# Merge on subject + sedation level
df_combined = pd.merge(
    df_metrics,
    df_ratios,
    on=["Subject", "SedationLevel"],
    how="left"
)

# Save for reproducibility
df_combined.to_csv("../data/data_derivatives/features_ratio_metrics.csv", index=False)

df_combined.head()


Unnamed: 0,Subject,Epoch,SedationLabel,SedationLevel,Band,mean_degree,clustering,path_length,global_efficiency,local_efficiency,...,modularity_alpha_beta_ratio,modularity_delta_alpha_ratio,participation_coefficient_theta_alpha_ratio,participation_coefficient_theta_beta_ratio,participation_coefficient_alpha_beta_ratio,participation_coefficient_delta_alpha_ratio,small_worldness_theta_alpha_ratio,small_worldness_theta_beta_ratio,small_worldness_alpha_beta_ratio,small_worldness_delta_alpha_ratio
0,2,0,Baseline,1,delta,16.563253,0.244427,2.364679,0.614306,0.601338,...,0.87659,0.979715,1.131399,1.1153,0.985771,1.226716,0.916824,0.949149,1.035257,0.91896
1,2,1,Baseline,1,delta,16.418918,0.266921,2.483114,0.605881,0.604351,...,0.87659,0.979715,1.131399,1.1153,0.985771,1.226716,0.916824,0.949149,1.035257,0.91896
2,2,2,Baseline,1,delta,14.761273,0.220441,2.594248,0.619577,0.60122,...,0.87659,0.979715,1.131399,1.1153,0.985771,1.226716,0.916824,0.949149,1.035257,0.91896
3,2,3,Baseline,1,delta,15.051334,0.277515,2.557817,0.617542,0.677193,...,0.87659,0.979715,1.131399,1.1153,0.985771,1.226716,0.916824,0.949149,1.035257,0.91896
4,2,4,Baseline,1,delta,14.914247,0.284931,2.609908,0.611823,0.652024,...,0.87659,0.979715,1.131399,1.1153,0.985771,1.226716,0.916824,0.949149,1.035257,0.91896


In [14]:

df_delta = normalize_within_subject(df_combined)
df_delta.to_csv("../data/data_derivatives/features_within_subject.csv", index=False)

In [15]:
print(df_delta["global_efficiency"])

78     0.611621
79     0.616530
80     0.598746
81     0.615181
82     0.616916
         ...   
470    0.615934
471    0.617643
472    0.612210
473    0.614001
474    0.619984
Name: global_efficiency, Length: 580, dtype: float64
