In [1]:
import mne
import pandas as pd
import numpy as np
import os

def process_subject_epochs(sub_id, run_ids, raw_base_dir, stim_base_dir, out_base_dir):
    """
    切出每個 run 的 epoch + metadata，存檔（會自動 skip 缺檔案）
    
    sub_id: e.g. 'sub-01'
    run_ids: e.g. [1, 2, 3, 4, ...]
    raw_base_dir: 原始 raw 的資料夾
    stim_base_dir: stimulus table 的資料夾
    out_base_dir: 存檔資料夾
    """
    for run_id in run_ids:
        print(f'Processing {sub_id}, run {run_id}...')
        
        # 檢查 raw 檔案是否存在
        meg_path = f"{raw_base_dir}/{sub_id}/MEG/{sub_id}_task-RDR_run-{run_id}_meg.fif"
        if not os.path.exists(meg_path):
            print(f'⚠️ Skipping run {run_id}: raw file not found → {meg_path}')
            continue

        # 檢查 stim table 是否存在
        stim_path = f"{stim_base_dir}/story_{run_id}_stimulus_table.csv"
        if not os.path.exists(stim_path):
            print(f'⚠️ Skipping run {run_id}: stimulus table not found → {stim_path}')
            continue

        # === 讀 raw ===
        raw = mne.io.read_raw_fif(meg_path, preload=True)
        sfreq = raw.info['sfreq']

        # === 讀 stim csv 並過濾 POS ===
        df = pd.read_csv(stim_path)
        df = df[df['pos'].isin(['NN', 'VV'])].copy()
        if df.empty:
            print(f'⚠️ Skipping run {run_id}: no NN/VV entries in stimulus table.')
            continue

        # === 建 events ===
        event_samples = (df['word_onset_sec'] * sfreq).round().astype(int).to_numpy()
        event_ids = df['pos'].map({'NN': 1, 'VV': 2}).to_numpy()
        events = np.column_stack([event_samples, np.zeros_like(event_samples), event_ids])

        event_id = {'NN': 1, 'VV': 2}

        # === 切 epoch ===
        try:
            epochs = mne.Epochs(
                raw, events, event_id=event_id,
                tmin=-0.2, tmax=0.8,
                baseline=None,
                picks='meg',
                preload=True
            )
        except Exception as e:
            print(f'❌ Error processing run {run_id}: {e}')
            continue

        # === metadata dataframe ===
        meta_df = pd.DataFrame({
            'word': df['word'].values,
            'pos': df['pos'].values,
            'word_freq_log': df['word_freq_log'].values,
            'word_onset_sec': df['word_onset_sec'].values
        })

        # === 存檔 ===
        out_dir = f'{out_base_dir}/{sub_id}_run-{run_id}'
        os.makedirs(out_dir, exist_ok=True)

        epochs.save(f'{out_dir}/{sub_id}_run-{run_id}_epo.fif', overwrite=True)
        meta_df.to_csv(f'{out_dir}/{sub_id}_run-{run_id}_meta.csv', index=False)

        print(f'✅ {sub_id}, run {run_id} done.\n')


In [2]:
# 設定參數
sub_id = 'sub-01'
#run_ids = [1, 2, 3, 4]  # 你可以改成 list(range(1,61)) → 一鍵處理 60 runs
run_ids =list(range(1,61))
raw_base_dir = '../SMN4Lang_data/ds004078/derivatives/preprocessed_data'
stim_base_dir = 'StimulusTables'
out_base_dir = 'PreprocessedEpochs'

# 執行
process_subject_epochs(sub_id, run_ids, raw_base_dir, stim_base_dir, out_base_dir)


Processing sub-01, run 1...
Opening raw data file ../SMN4Lang_data/ds004078/derivatives/preprocessed_data/sub-01/MEG/sub-01_task-RDR_run-1_meg.fif...
    Range : 13000 ... 458999 =     13.000 ...   458.999 secs
Ready.
Reading 0 ... 445999  =      0.000 ...   445.999 secs...
Not setting metadata
411 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 411 events and 1001 original time points ...
20 bad epochs dropped
→ sub-01, run 1 done.

Processing sub-01, run 2...
Opening raw data file ../SMN4Lang_data/ds004078/derivatives/preprocessed_data/sub-01/MEG/sub-01_task-RDR_run-2_meg.fif...
    Range : 8000 ... 405999 =      8.000 ...   405.999 secs
Ready.
Reading 0 ... 397999  =      0.000 ...   397.999 secs...
Not setting metadata
381 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 381 events and 1001 original time points ...
10 bad epochs dropped
→ sub-01, ru

FileNotFoundError: fname does not exist: "/Users/meguser/Desktop/Project for Brainhack/domybest/../SMN4Lang_data/ds004078/derivatives/preprocessed_data/sub-01/MEG/sub-01_task-RDR_run-16_meg.fif"

In [4]:
sub_id = 'sub-01'
run_ids = list(range(16, 61))  # or any subset you want
raw_base_dir = '../SMN4Lang_data/ds004078/derivatives/preprocessed_data'
stim_base_dir = 'StimulusTables'
out_base_dir = 'PreprocessedEpochs'

process_subject_epochs(sub_id, run_ids, raw_base_dir, stim_base_dir, out_base_dir)


Processing sub-01, run 16...
⚠️ Skipping run 16: raw file not found → ../SMN4Lang_data/ds004078/derivatives/preprocessed_data/sub-01/MEG/sub-01_task-RDR_run-16_meg.fif
Processing sub-01, run 17...
Opening raw data file ../SMN4Lang_data/ds004078/derivatives/preprocessed_data/sub-01/MEG/sub-01_task-RDR_run-17_meg.fif...
    Range : 8000 ... 328999 =      8.000 ...   328.999 secs
Ready.
Reading 0 ... 320999  =      0.000 ...   320.999 secs...
Not setting metadata
355 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 355 events and 1001 original time points ...
11 bad epochs dropped
✅ sub-01, run 17 done.

Processing sub-01, run 18...
Opening raw data file ../SMN4Lang_data/ds004078/derivatives/preprocessed_data/sub-01/MEG/sub-01_task-RDR_run-18_meg.fif...
    Range : 7000 ... 314999 =      7.000 ...   314.999 secs
Ready.
Reading 0 ... 307999  =      0.000 ...   307.999 secs...
Not setting metadata
305 matching events found
N