In [1]:
import numpy as np
import mne
from mne.minimum_norm import read_inverse_operator

# Load your forward/inverse operators
inv_operator = read_inverse_operator('inverse_operator-inv.fif', verbose=False)
info = mne.io.read_info("info.fif", verbose=False)

# Get the source space from forward solution
fwd = mne.read_forward_solution('forward_solution-fwd.fif', verbose=False)
src = fwd['src']  # Source space object

print(f"Source space structure:")
print(f"  LH vertices: {src[0]['nuse']}")
print(f"  RH vertices: {src[1]['nuse']}")
print(f"  Total: {src[0]['nuse'] + src[1]['nuse']}")

# Get fsaverage surface
from mne.datasets import fetch_fsaverage
from pathlib import Path

fs_dir = fetch_fsaverage(verbose=False)
subjects_dir = Path(fs_dir).parent.as_posix()
subject = 'fsaverage'

# Load labels
labels = mne.read_labels_from_annot(subject, parc='aparc', subjects_dir=subjects_dir, verbose=False)

# Define language-relevant ROIs
language_roi_names = [
    # 'superiorfrontal-lh',
    # 'precentral-lh',
    # 'postcentral-lh',
    # 'inferiorparietal-lh',
    # 'supramarginal-lh',
    # 'superiortemporal-lh',
    # 'middletemporal-lh',
    # 'inferiororbitalfrontal-lh',
    # 'parsopercularis-lh',
    # 'parsorbitalis-lh',
    # 'parstriangularis-lh',
    # 'rostralanteriorcingulate-lh',
    # 'caudalanteriorcingulate-lh',
    # 'superiorfrontal-rh',
    # 'precentral-rh',
    # 'postcentral-rh',
    # 'inferiorparietal-rh',
    # 'supramarginal-rh',
    # 'superiortemporal-rh',
    # 'middletemporal-rh',
    # 'inferiororbitalfrontal-rh',
    # 'parsopercularis-rh',
    # 'parsorbitalis-rh',
    # 'parstriangularis-rh',

    # Left hemisphere - PRIMARY language areas
    'parsopercularis-lh',      # Broca's area (BA 44)
    'parstriangularis-lh',     # Broca's area (BA 45)
    'superiortemporal-lh',     # Wernicke's area
    'supramarginal-lh',        # Angular gyrus - phonological processing
    'precentral-lh',           # Motor cortex - speech production
    
    # Right hemisphere - prosody and supplementary
    'parsopercularis-rh',
    'parstriangularis-rh',
    'superiortemporal-rh',
]

# Filter to language ROIs
language_labels = [l for l in labels if any(roi in l.name.lower() for roi in language_roi_names)]

print(f"\nFound {len(language_labels)} language ROI labels")

# Map label vertices to source space indices
# Labels have vertices in full surface space; we need indices in our source space
roi_vertices = []

for label in language_labels:
    # Get the source space for this label (lh or rh)
    if 'lh' in label.name:
        src_hemi = src[0]
    else:
        src_hemi = src[1]
    
    # Get the vertices in the source space
    src_vertices = src_hemi['vertno']  # indices into the full surface
    
    # Find intersection: which label vertices are in our source space?
    label_verts_in_src = np.intersect1d(label.vertices, src_vertices)
    
    # Convert to indices into the source space array
    # np.searchsorted finds the position in sorted src_vertices
    indices = np.searchsorted(src_vertices, label_verts_in_src)
    
    # Add offset for RH (if this is RH, offset by LH size)
    if 'rh' in label.name:
        indices = indices + src[0]['nuse']
    
    roi_vertices.extend(indices.tolist())
    print(f"  {label.name}: {len(label_verts_in_src)} vertices → indices")

roi_vertices = sorted(set(roi_vertices))

print(f"\nTotal vertices in language ROIs: {len(roi_vertices)}")
print(f"Min index: {min(roi_vertices)}, Max index: {max(roi_vertices)}")
print(f"All indices in valid range [0, 8195]? {max(roi_vertices) < 8196}")

if max(roi_vertices) >= 8196:
    print("ERROR: Still have invalid indices!")
else:
    print("✓ All indices are valid!")

# Save ROI information
import json
with open('language_roi_config.json', 'w') as f:
    json.dump({
        'roi_vertices': roi_vertices,
        'roi_labels': [l.name for l in language_labels],
        'n_roi_vertices': len(roi_vertices),
        'original_n_sources': 8196,
    }, f, indent=2)

print("\nROI configuration saved to 'language_roi_config.json'")

def localize_and_extract_rois(data: np.ndarray, 
                              stop: int | None = None) -> np.ndarray:
    """
    Localize sensor data to source space and extract only language ROIs.
    
    Args:
        data: shape (n_channels, n_times) - sensor EEG data
        stop: end sample (None = use all data)
    
    Returns:
        roi_data: shape (n_roi_vertices, n_times) - source-localized data for language ROIs only
    """
    from mne.minimum_norm import apply_inverse_raw
    
    raw = mne.io.RawArray(data, info, verbose=False)
    
    stc_full = apply_inverse_raw(raw, inv_operator, 
                                 lambda2=1.0/9.0, 
                                 method='dSPM',
                                 buffer_size=5000,
                                 start=0, 
                                 stop=stop,
                                 verbose=False)
    
    roi_data = stc_full.data[roi_vertices, :]
    
    return roi_data

Source space structure:
  LH vertices: 4098
  RH vertices: 4098
  Total: 8196

Found 8 language ROI labels
  parsopercularis-lh: 74 vertices → indices
  parsopercularis-rh: 56 vertices → indices
  parstriangularis-lh: 59 vertices → indices
  parstriangularis-rh: 65 vertices → indices
  precentral-lh: 263 vertices → indices
  superiortemporal-lh: 180 vertices → indices
  superiortemporal-rh: 168 vertices → indices
  supramarginal-lh: 211 vertices → indices

Total vertices in language ROIs: 1076
Min index: 0, Max index: 8186
All indices in valid range [0, 8195]? True
✓ All indices are valid!

ROI configuration saved to 'language_roi_config.json'


# Test

In [2]:
import numpy as np

# data = np.load("/mnt/D/University/Fall 2025/BCI/Project/shards/words/old/shard_0.npy", allow_pickle=True)
data = np.load("/mnt/D/University/Fall 2025/BCI/Project/shards/words/shard_0.npy", allow_pickle=True)

In [3]:
len(data)

50

In [4]:
data[0].keys()

dict_keys(['eeg', 'localized_eeg', 'labels'])

In [5]:
len(data[0]["localized_eeg"])

26

In [6]:
data[0]["localized_eeg"][0].shape

(3438, 120)

In [7]:
data[0]["localized_eeg"][25].shape

(3438, 152)