# Features 
| Column Name       | Description                                                                 | Type      | Mathematical Basis / Formula |
|--------------------|------------------------------------------------------------------------------|------------|--------------------------------|
| **Subject**        | Participant identifier.                                                     | int        | — |
| **SedationLabel**  | Sedation level label: Baseline, Mild, Moderate, Recovery.                   | string     | — |
| **Band**           | EEG frequency band (Delta, Theta, Alpha, Beta, Gamma).                      | string     | Defined by frequency bounds $(f_{min}, f_{max})$ |
| **mean_degree**    | Average weighted node degree — mean sum of connection weights per node.     | float      | $k_i = \sum_j w_{ij}$, $\langle k \rangle = \frac{1}{N} \sum_i k_i$ |
| **clustering**     | Weighted clustering coefficient — measures local interconnectedness.         | float      | $C = \frac{1}{N} \sum_i \frac{(W^{1/3})_{ij}(W^{1/3})_{jk}(W^{1/3})_{ki}}{k_i(k_i - 1)}$ |
| **path_length**    | Characteristic path length — mean shortest path between all node pairs.     | float      | $L = \frac{1}{N(N-1)} \sum_{i \ne j} d_{ij}$, $d_{ij} = 1 / w_{ij}$ |
| **small_worldness**| Small-worldness coefficient — ratio of normalized clustering to path length. | float      | $\sigma = \frac{C / C_{rand}}{L / L_{rand}}$ |
| **Propofol_ugL**   | Propofol plasma concentration at recording time (µg/L).                     | float      | — |
| **RT_ms**          | Mean reaction time (ms) in behavioral task.                                 | float      | — |
| **Correct**        | Number of correct responses (max = 40).                                     | int        | — |
| **Responsiveness**      | Behavioral responsiveness classification: 1 = responsive, 0 = drowsy/unresponsive. Based on overlap of binomial confidence intervals between baseline and moderate sedation hit-rate distributions.         | int        | $R = 1 \text{ if } CI_{\text{mod}} \cap CI_{\text{base}} \neq \emptyset,\ 0 \text{ otherwise}$ |

In [6]:
import numpy as np
import pandas as pd
import mne
from mne_connectivity import spectral_connectivity_epochs
from pathlib import Path
from statsmodels.stats.proportion import proportion_confint
from tqdm import tqdm
import warnings
import sys

sys.path.append('../src')
from connectivity import compute_dwpli
from graph_metrics import compute_graph_metrics

warnings.filterwarnings("ignore")

FREQ_BANDS = {
    "delta": (1, 4),
    "theta": (4, 8),
    "alpha": (8, 13),
    "beta": (13, 30),
    "gamma": (30, 45),
}

MANIFEST_PATH = Path("../data/data_derivatives/manifests/manifest.csv")
manifest = pd.read_csv(MANIFEST_PATH)

# --- Resolve paths relative to the project root ---
# (Assumes notebook is inside /notebooks)
PROJECT_ROOT = Path.cwd().parent
manifest["SetPath"] = manifest["SetPath"].apply(lambda p: (PROJECT_ROOT / p).resolve())

# --- Subset for testing ---
subset_manifest = manifest[manifest["Subject"].isin([1, 2])]


In [None]:
records = []

sedation_map = {
    "Baseline": 1,
    "Mild": 2,
    "Moderate": 3,
    "Recovery": 4
}

for idx in range(len(manifest)):
    row = manifest.iloc[idx]
    set_path = Path(row["SetPath"])
    
    if not set_path.exists():
        print(f"File not found: {set_path}")
        continue

    # Load EEG epochs
    try:
        epochs = mne.io.read_epochs_eeglab(set_path, verbose="error")
    except Exception as e:
        print(f"Could not load {set_path.name}: {e}")
        continue

    # Loop over frequency bands
    for band, (fmin, fmax) in FREQ_BANDS.items():
        try:
            con_matrix = compute_dwpli(epochs, fmin, fmax)

            if np.isnan(con_matrix).all() or con_matrix.shape[0] == 0:
                raise ValueError(f"Empty or NaN matrix in {band}")

            metrics = compute_graph_metrics(con_matrix, n_rand=10)

            record = {
                "Subject": row["Subject"],
                "SedationLabel": row["SedationLabel"],
                "SedationLevel": sedation_map.get(row["SedationLabel"], None),
                "Band": band,
                **metrics,
                "Propofol_ugL": row["Propofol_ugL"],
                "RT_ms": row["RT_ms"],
                "Correct": row["Correct"],
            }
            records.append(record)

        except Exception as e:
            print(f"❌ Error in {row['BaseName']} ({band}): {type(e).__name__}: {e}")
            continue

df_metrics = pd.DataFrame(records)
df_metrics.to_csv("../data/data_derivatives/features.csv", index=False)

In [None]:
import sys
sys.path.append('../src')

import importlib
import transformation
importlib.reload(transformation)

from transformation import compute_band_ratios

# Transformations

df_ratios = compute_band_ratios(
    df_metrics,
    output_path="../data/data_derivatives/features_band_ratios.csv"
)

df_ratios.head()

Unnamed: 0,Subject,SedationLevel,mean_degree_theta_alpha_ratio,mean_degree_theta_beta_ratio,mean_degree_alpha_beta_ratio,mean_degree_delta_alpha_ratio,clustering_theta_alpha_ratio,clustering_theta_beta_ratio,clustering_alpha_beta_ratio,clustering_delta_alpha_ratio,...,modularity_alpha_beta_ratio,modularity_delta_alpha_ratio,participation_coefficient_theta_alpha_ratio,participation_coefficient_theta_beta_ratio,participation_coefficient_alpha_beta_ratio,participation_coefficient_delta_alpha_ratio,small_worldness_theta_alpha_ratio,small_worldness_theta_beta_ratio,small_worldness_alpha_beta_ratio,small_worldness_delta_alpha_ratio
0,2,1,0.363835,1.706611,4.690617,0.230069,0.393301,1.078968,2.743363,0.207392,...,0.33755,2.129673,0.950674,1.032764,1.086349,0.978647,0.847146,1.017474,1.201061,1.020201
1,2,2,0.210493,1.135297,5.393515,0.220949,0.141903,0.453421,3.195284,0.12282,...,0.418208,1.678315,0.974923,0.935394,0.959454,0.917907,0.869641,0.997367,1.146871,1.126771
2,2,3,0.235909,0.411764,1.745441,0.3738,0.164409,0.159703,0.971375,0.251851,...,0.530053,2.205645,0.926072,0.99858,1.078297,0.930935,0.896309,0.998967,1.114534,1.106752
3,2,4,0.288482,1.066094,3.695533,0.219857,0.262641,0.661918,2.520235,0.160744,...,0.407585,2.829673,1.052371,1.115686,1.060164,0.953419,0.824829,0.960508,1.164494,0.99013
4,3,1,0.36267,5.855636,16.145903,0.171322,0.430664,1.759742,4.08611,0.25852,...,0.5614,1.084415,0.982731,0.924986,0.941241,1.094073,0.991439,0.912973,0.920856,1.084244


In [None]:
# Merge on subject + sedation level
df_combined = pd.merge(
    df_metrics,
    df_ratios,
    on=["Subject", "SedationLevel"],
    how="left"
)

# Save for reproducibility
df_combined.to_csv("../data/data_derivatives/features_ratio_metrics.csv", index=False)

df_combined.head()


✅ Combined features shape: (400, 47)


Unnamed: 0,Subject,SedationLabel,SedationLevel,Band,mean_degree,clustering,path_length,global_efficiency,local_efficiency,modularity,...,modularity_alpha_beta_ratio,modularity_delta_alpha_ratio,participation_coefficient_theta_alpha_ratio,participation_coefficient_theta_beta_ratio,participation_coefficient_alpha_beta_ratio,participation_coefficient_delta_alpha_ratio,small_worldness_theta_alpha_ratio,small_worldness_theta_beta_ratio,small_worldness_alpha_beta_ratio,small_worldness_delta_alpha_ratio
0,2,Baseline,1,delta,5.746849,0.07341,10.327662,0.931746,0.939725,0.097782,...,0.33755,2.129673,0.950674,1.032764,1.086349,0.978647,0.847146,1.017474,1.201061,1.020201
1,2,Baseline,1,theta,9.088149,0.139216,8.207641,0.948107,0.956259,0.079196,...,0.33755,2.129673,0.950674,1.032764,1.086349,0.978647,0.847146,1.017474,1.201061,1.020201
2,2,Baseline,1,alpha,24.978758,0.353968,3.55243,0.997436,0.997464,0.045914,...,0.33755,2.129673,0.950674,1.032764,1.086349,0.978647,0.847146,1.017474,1.201061,1.020201
3,2,Baseline,1,beta,5.325261,0.129027,13.666625,0.973871,0.975702,0.136021,...,0.33755,2.129673,0.950674,1.032764,1.086349,0.978647,0.847146,1.017474,1.201061,1.020201
4,2,Baseline,1,gamma,4.82243,0.070717,13.296375,0.951893,0.961439,0.151773,...,0.33755,2.129673,0.950674,1.032764,1.086349,0.978647,0.847146,1.017474,1.201061,1.020201


In [None]:
import sys
sys.path.append('../src')

import importlib
import normalization  
importlib.reload(normalization)

from normalization import normalize_within_subject

df_delta = normalize_within_subject(df_metrics)
df_delta.to_csv("../data/data_derivatives/features_within_subject.csv", index=False)

    mean_degree_delta  clustering_delta  path_length_delta  \
2            0.000000          0.000000           0.000000   
7           -0.063636          0.066266           0.063774   
12          -0.335567         -0.362783           0.366614   
17          -0.050701         -0.017606           0.046465   
3            0.000000          0.000000           0.000000   

    global_efficiency_delta  local_efficiency_delta  modularity_delta  \
2                  0.000000                0.000000          0.000000   
7                  0.000734                0.000699          0.346046   
12                -0.010038               -0.009638          0.368215   
17                -0.000245               -0.000281          0.025137   
3                  0.000000                0.000000          0.000000   

    participation_coefficient_delta  small_worldness_delta  \
2                          0.000000               0.000000   
7                         -0.128140              -0.018987   
12