# 1_behavior_eeg_preprocess

In [6]:
import os
import numpy as np
import pandas as pd
import scipy.io
import mne
from mne_bids import BIDSPath, write_raw_bids, make_dataset_description

import logging
import re

import matplotlib
matplotlib.use('Qt5Agg')

In [None]:
# =============================================================================
# Configuration and Constants
# =============================================================================

# Set up logging: messages will include timestamp, log level, and message content
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# === Path configuration ===
DS_ROOT_EEG = '/volumes/hyijie_psy/CPP/data_high_Sun_2023/Rawdata'
DS_ROOT_BEH = '/volumes/hyijie_psy/CPP/data_high_Sun_2023/behavior'
PATH_RAW_DATA_BIDS = '../data/data_raw_BIDS/Emotion'
PATH_PREPROCESS_DATA_BIDS = '../data/preprocessData/Emotion'

os.makedirs(PATH_RAW_DATA_BIDS, exist_ok=True)
os.makedirs(PATH_PREPROCESS_DATA_BIDS, exist_ok=True)

# === Constant definitions (for improved readability) ===
STIM_MARKER = '111'   # Stimulus onset marker
RESP1_MARKER = '101'  # Response marker 1
RESP2_MARKER = '102'  # Response marker 2

# === Subject lists ===
SUBJECT_IDS_EEG = [f'emotion_sub{i}' for i in range(1, 19)]
SUBJECT_IDS_BEH = [f'Emotion_decision_sub{i}' for i in range(1,19)]


In [8]:
# =============================================================================
# Exclude subjects with misaligned trial counts between behavior and EEG data
# =============================================================================
sub_excluded = []

for sub_eeg, sub_beh in zip(SUBJECT_IDS_EEG, SUBJECT_IDS_BEH):
    sub_idx = sub_beh.replace('Emotion_decision_', '')
    logger.info(f"Processing subject: {sub_idx}")

    # === step 1: Load eeg and behavior data ===
    # Set path for loading 
    path_eeg = os.path.join(DS_ROOT_EEG, sub_eeg+'.cnt')
    path_beh = os.path.join(DS_ROOT_BEH, sub_beh+'.dat')
    assert os.path.exists(path_eeg), f"EEG file not found: {path_eeg}"
    assert os.path.exists(path_beh), f"Behavior file not found: {path_beh}"

    # Load eeg and behavior data
    eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)
    data_behavior = pd.read_csv(path_beh, header=None)

    # === step 2: Align behavior and eeg data to get cleaned dataset ===
    # Find stimuli events to ensure that the behavior and eeg data are aligned
    n_behabior = len(data_behavior)
    
    events, events_id = mne.events_from_annotations(eeg_cnt)
    event_key_stim = None
    for key, value in events_id.items():
        if STIM_MARKER in key:
            event_key_stim = value

    # Validate consistency between number of stimulus events and behavioral trials
    n_events_stim = np.sum(events[:, 2] == event_key_stim)
    
    if n_events_stim != n_behabior:
        sub_excluded.append(sub_beh)
        print(f"Skipping subject {sub_idx}: n_stim_events count ({n_events_stim}) != behavior rows ({n_behabior})")
        continue
    
# === step 3: Save list of excluded subjects ===
if sub_excluded:
    excluded_sub_df = pd.DataFrame(sub_excluded, columns=['excluded_subjects'])
    excluded_sub_df.to_csv(
        os.path.join(PATH_RAW_DATA_BIDS, 'sub_excluded.csv'),
        header=False,
        index=False
    )
    logger.info(f"Excluded subjects (number): {len(sub_excluded)}")
else:
    logger.info("No subjects excluded.")

2025-12-08 22:33:17,388 - INFO - Processing subject: sub1


Reading 0 ... 1050399  =      0.000 ...  1050.399 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Used Annotations descriptions: [np.str_('101'), np.str_('102'), np.str_('11'), np.str_('111'), np.str_('112'), np.str_('12'), np.str_('121'), np.str_('122'), np.str_('123'), np.str_('13'), np.str_('14'), np.str_('15'), np.str_('16'), np.str_('17'), np.str_('21'), np.str_('22'), np.str_('23'), np.str_('24'), np.str_('25'), np.str_('26'), np.str_('27')]


2025-12-08 22:33:17,891 - INFO - Processing subject: sub2


Skipping subject sub1: n_stim_events count (278) != behavior rows (280)
Reading 0 ... 1119599  =      0.000 ...  1119.599 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


KeyboardInterrupt: 

In [None]:
# =============================================================================
# Identify and index excluded trials:
#   (1) Trials missing response triggers in EEG event annotations
#   (2) RT outliers (> 3 SD)
# =============================================================================
data_invalid_trials_full = []
data_invalid_trials_eeg_full = []
# Load the index of excluded subjects
excluded_sub_df = pd.read_csv(os.path.join(PATH_RAW_DATA_BIDS, 'sub_excluded.csv'), header=None)
excluded_sub_set = set(excluded_sub_df.iloc[:, 0].tolist())

for sub_eeg, sub_beh in zip(SUBJECT_IDS_EEG, SUBJECT_IDS_BEH):
    if sub_beh in excluded_sub_set:
        continue
    sub_idx = sub_beh.replace('Emotion_decision_', '')
    logger.info(f"Processing subject: {sub_idx}")

    # === Step 1: Load behavioral and eeg data ===
    # Set path for loading 
    path_eeg = os.path.join(DS_ROOT_EEG, sub_eeg+'.cnt')
    path_beh = os.path.join(DS_ROOT_BEH, sub_beh+'.dat')
    assert os.path.exists(path_eeg), f"EEG file not found: {path_eeg}"
    assert os.path.exists(path_beh), f"Behavior file not found: {path_beh}"
    # Load eeg and behavior data
    eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)
    data_behavior = pd.read_csv(path_beh, header=None)

    # === Step 2: Select sepcific columns (aimed triggers) for behavior (eeg) data ===
    behavior_selected_cols = data_behavior.iloc[:, [6]].copy()
    behavior_selected_cols.columns = ['rt']

    # Select aimed triggers of eeg data
    events, events_id = mne.events_from_annotations(eeg_cnt)
    event_key_stim = None
    event_keys_resp = []
    event_keys_full = []
    for key, value in events_id.items():
        if STIM_MARKER in key:
            event_key_stim = value
        if any(trigger in key for trigger in [RESP1_MARKER, RESP2_MARKER]):
            event_keys_resp.append(value)
        if any(trigger in key for trigger in [STIM_MARKER, RESP1_MARKER, RESP2_MARKER]):
            event_keys_full.append(value)
    events_select = events[np.isin(events[:, 2], event_keys_full)]

    # === Step3: Index invalid trials===
    data_invalid_trial = set()
    data_invalid_trial_eeg = set()
    # The first process: lack of response triggers of EEG data
    index_events_stim = np.where(events_select[:, 2] == event_key_stim)[0]
    for i in range(events_select.shape[0] - 1):
        if (events_select[i, 2] == event_key_stim) and (events_select[i + 1, 2] == event_key_stim):
            index_invalid_trial = i
            # Compute the trial rank
            rank = int(np.sum(index_events_stim <= index_invalid_trial) -1) # 0-based for python index
            data_invalid_trial_eeg.add(rank)
    
    # The second process: trials with RT outliers
    # Remove invalid trials above in hebavioral data
    index_invalid_trials_eeg = sorted(data_invalid_trial_eeg)
    data_behabior_remove_invalid_eeg_trials = behavior_selected_cols.drop(index=index_invalid_trials_eeg).reset_index(drop=True)

    # Compute RT bounds (Â±3 SD)
    rt = data_behabior_remove_invalid_eeg_trials['rt']
    rt_mean = np.nanmean(rt)
    rt_std = np.nanstd(rt)
    rt_lower = rt_mean - 3 * rt_std
    rt_upper = rt_mean + 3 * rt_std

    # Valid trial mask
    mask_rt_outlier = (rt < rt_lower) | (rt > rt_upper)
    # Get original row indices of valid trials
    index_rt_outlier= data_behabior_remove_invalid_eeg_trials[mask_rt_outlier].index # 0-based for python index
    data_invalid_trial.update(int(x) for x in index_rt_outlier)

    # === Step 4: Create DataFrame for this subject ===
    if data_invalid_trial:
        df_invalid = pd.DataFrame({
            'subj_idx': sub_idx,
            'invalid_trial_index': sorted(data_invalid_trial)
        })
        data_invalid_trials_full.append(df_invalid)

        logger.info(f"Subject {sub_idx}: {len(data_invalid_trial)} invalid trials identified.")
    else:
        logger.info(f"Subject {sub_idx}: no invalid trials found.")

    if data_invalid_trial_eeg:
        df_invalid_eeg = pd.DataFrame({
            'subj_idx': sub_idx,
            'invalid_eeg_trial_index': index_invalid_trials_eeg
        })
        data_invalid_trials_eeg_full.append(df_invalid_eeg)

        logger.info(f"Subject {sub_idx}: {len(df_invalid_eeg)} invalid trials identified.")
    else:
        logger.info(f"Subject {sub_idx}: no invalid trials found.")
    
# === Save all invalid trials ===
if data_invalid_trials_full:
    df_all_invalid = pd.concat(data_invalid_trials_full, ignore_index=True)
    df_all_invalid.to_csv(
        os.path.join(PATH_RAW_DATA_BIDS, 'all_invalid_trials.csv'),
        index=False
    )
    logger.info(f"Saved invalid trials for {len(df_all_invalid)} trials.")
else:
    logger.info("No invalid trials found across all subjects.")

if data_invalid_trials_eeg_full:
    df_all_invalid_eeg = pd.concat(data_invalid_trials_eeg_full, ignore_index=True)
    df_all_invalid_eeg.to_csv(
        os.path.join(PATH_RAW_DATA_BIDS, 'all_invalid_eeg_trials.csv'),
        index=False
    )
    logger.info(f"Saved invalid eeg trials for {len(df_all_invalid_eeg)} trials.")
else:
    logger.info("No invalid trials found across all subjects.")



2025-12-08 22:32:17,220 - INFO - Processing subject: sub2


Reading 0 ... 1119599  =      0.000 ...  1119.599 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Used Annotations descriptions: [np.str_('101'), np.str_('102'), np.str_('103'), np.str_('11'), np.str_('111'), np.str_('112'), np.str_('12'), np.str_('121'), np.str_('122'), np.str_('123'), np.str_('124'), np.str_('13'), np.str_('14'), np.str_('15'), np.str_('16'), np.str_('17'), np.str_('21'), np.str_('22'), np.str_('23'), np.str_('24'), np.str_('25'), np.str_('26'), np.str_('27')]


2025-12-08 22:32:17,661 - INFO - Subject sub2: 3 invalid trials identified.
2025-12-08 22:32:17,661 - INFO - Subject sub2: 6 invalid trials identified.
2025-12-08 22:32:17,661 - INFO - Processing subject: sub3


Reading 0 ... 1238319  =      0.000 ...  1238.319 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Used Annotations descriptions: [np.str_('101'), np.str_('102'), np.str_('103'), np.str_('11'), np.str_('111'), np.str_('112'), np.str_('12'), np.str_('121'), np.str_('122'), np.str_('123'), np.str_('124'), np.str_('13'), np.str_('14'), np.str_('15'), np.str_('16'), np.str_('17'), np.str_('21'), np.str_('22'), np.str_('23'), np.str_('24'), np.str_('25'), np.str_('26'), np.str_('27')]


2025-12-08 22:32:18,181 - INFO - Subject sub3: no invalid trials found.
2025-12-08 22:32:18,181 - INFO - Subject sub3: 29 invalid trials identified.
2025-12-08 22:32:18,182 - INFO - Processing subject: sub4


Reading 0 ... 1083839  =      0.000 ...  1083.839 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Used Annotations descriptions: [np.str_('101'), np.str_('102'), np.str_('103'), np.str_('11'), np.str_('111'), np.str_('112'), np.str_('12'), np.str_('121'), np.str_('122'), np.str_('123'), np.str_('13'), np.str_('14'), np.str_('15'), np.str_('16'), np.str_('17'), np.str_('21'), np.str_('22'), np.str_('23'), np.str_('24'), np.str_('25'), np.str_('26'), np.str_('27')]


2025-12-08 22:32:18,658 - INFO - Subject sub4: no invalid trials found.
2025-12-08 22:32:18,658 - INFO - Subject sub4: 8 invalid trials identified.
2025-12-08 22:32:18,658 - INFO - Processing subject: sub6


Reading 0 ... 1161999  =      0.000 ...  1161.999 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Used Annotations descriptions: [np.str_('101'), np.str_('102'), np.str_('103'), np.str_('11'), np.str_('111'), np.str_('112'), np.str_('12'), np.str_('121'), np.str_('122'), np.str_('123'), np.str_('124'), np.str_('13'), np.str_('14'), np.str_('15'), np.str_('16'), np.str_('17'), np.str_('21'), np.str_('22'), np.str_('23'), np.str_('24'), np.str_('25'), np.str_('26'), np.str_('27')]


2025-12-08 22:32:19,163 - INFO - Subject sub6: no invalid trials found.
2025-12-08 22:32:19,163 - INFO - Subject sub6: 9 invalid trials identified.
2025-12-08 22:32:19,163 - INFO - Processing subject: sub7


Reading 0 ... 1001799  =      0.000 ...  1001.799 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Used Annotations descriptions: [np.str_('101'), np.str_('102'), np.str_('103'), np.str_('11'), np.str_('111'), np.str_('112'), np.str_('12'), np.str_('121'), np.str_('122'), np.str_('123'), np.str_('13'), np.str_('14'), np.str_('15'), np.str_('16'), np.str_('17'), np.str_('21'), np.str_('22'), np.str_('23'), np.str_('24'), np.str_('25'), np.str_('26'), np.str_('27')]


2025-12-08 22:32:19,615 - INFO - Subject sub7: 10 invalid trials identified.
2025-12-08 22:32:19,615 - INFO - Subject sub7: 8 invalid trials identified.
2025-12-08 22:32:19,616 - INFO - Processing subject: sub8


Reading 0 ... 1228519  =      0.000 ...  1228.519 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Used Annotations descriptions: [np.str_('101'), np.str_('102'), np.str_('103'), np.str_('11'), np.str_('111'), np.str_('112'), np.str_('12'), np.str_('121'), np.str_('122'), np.str_('123'), np.str_('124'), np.str_('13'), np.str_('14'), np.str_('15'), np.str_('16'), np.str_('17'), np.str_('21'), np.str_('22'), np.str_('23'), np.str_('24'), np.str_('25'), np.str_('26'), np.str_('27')]


2025-12-08 22:32:20,131 - INFO - Subject sub8: no invalid trials found.
2025-12-08 22:32:20,131 - INFO - Subject sub8: 36 invalid trials identified.
2025-12-08 22:32:20,131 - INFO - Processing subject: sub9


Reading 0 ... 1100399  =      0.000 ...  1100.399 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Used Annotations descriptions: [np.str_('101'), np.str_('102'), np.str_('11'), np.str_('111'), np.str_('112'), np.str_('12'), np.str_('121'), np.str_('122'), np.str_('123'), np.str_('13'), np.str_('14'), np.str_('15'), np.str_('16'), np.str_('17'), np.str_('21'), np.str_('22'), np.str_('23'), np.str_('24'), np.str_('25'), np.str_('26'), np.str_('27')]


2025-12-08 22:32:20,611 - INFO - Subject sub9: 2 invalid trials identified.
2025-12-08 22:32:20,611 - INFO - Subject sub9: 2 invalid trials identified.
2025-12-08 22:32:20,612 - INFO - Processing subject: sub10


Reading 0 ... 1121519  =      0.000 ...  1121.519 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Used Annotations descriptions: [np.str_('101'), np.str_('102'), np.str_('11'), np.str_('111'), np.str_('112'), np.str_('12'), np.str_('121'), np.str_('122'), np.str_('123'), np.str_('13'), np.str_('14'), np.str_('15'), np.str_('16'), np.str_('17'), np.str_('21'), np.str_('22'), np.str_('23'), np.str_('24'), np.str_('25'), np.str_('26'), np.str_('27')]


2025-12-08 22:32:21,090 - INFO - Subject sub10: 2 invalid trials identified.
2025-12-08 22:32:21,090 - INFO - Subject sub10: 9 invalid trials identified.
2025-12-08 22:32:21,091 - INFO - Processing subject: sub11


Reading 0 ... 1215879  =      0.000 ...  1215.879 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Used Annotations descriptions: [np.str_('101'), np.str_('102'), np.str_('103'), np.str_('11'), np.str_('111'), np.str_('112'), np.str_('12'), np.str_('121'), np.str_('122'), np.str_('123'), np.str_('13'), np.str_('14'), np.str_('15'), np.str_('16'), np.str_('17'), np.str_('21'), np.str_('22'), np.str_('23'), np.str_('24'), np.str_('25'), np.str_('26'), np.str_('27')]


2025-12-08 22:32:21,605 - INFO - Subject sub11: no invalid trials found.
2025-12-08 22:32:21,605 - INFO - Subject sub11: 5 invalid trials identified.
2025-12-08 22:32:21,605 - INFO - Processing subject: sub14


Reading 0 ... 1380359  =      0.000 ...  1380.359 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Used Annotations descriptions: [np.str_('101'), np.str_('102'), np.str_('103'), np.str_('11'), np.str_('111'), np.str_('112'), np.str_('12'), np.str_('121'), np.str_('122'), np.str_('123'), np.str_('13'), np.str_('14'), np.str_('15'), np.str_('16'), np.str_('17'), np.str_('21'), np.str_('22'), np.str_('23'), np.str_('24'), np.str_('25'), np.str_('26'), np.str_('27')]


2025-12-08 22:32:22,177 - INFO - Subject sub14: 6 invalid trials identified.
2025-12-08 22:32:22,177 - INFO - Subject sub14: 4 invalid trials identified.
2025-12-08 22:32:22,178 - INFO - Processing subject: sub15


Reading 0 ... 1099599  =      0.000 ...  1099.599 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Used Annotations descriptions: [np.str_('101'), np.str_('102'), np.str_('103'), np.str_('11'), np.str_('111'), np.str_('112'), np.str_('12'), np.str_('121'), np.str_('122'), np.str_('123'), np.str_('124'), np.str_('13'), np.str_('14'), np.str_('15'), np.str_('16'), np.str_('17'), np.str_('21'), np.str_('22'), np.str_('23'), np.str_('24'), np.str_('25'), np.str_('26'), np.str_('27')]


2025-12-08 22:32:22,643 - INFO - Subject sub15: no invalid trials found.
2025-12-08 22:32:22,643 - INFO - Subject sub15: 14 invalid trials identified.
2025-12-08 22:32:22,644 - INFO - Processing subject: sub16


Reading 0 ... 1164079  =      0.000 ...  1164.079 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Used Annotations descriptions: [np.str_('101'), np.str_('102'), np.str_('103'), np.str_('11'), np.str_('111'), np.str_('112'), np.str_('12'), np.str_('121'), np.str_('122'), np.str_('123'), np.str_('13'), np.str_('14'), np.str_('15'), np.str_('16'), np.str_('17'), np.str_('21'), np.str_('22'), np.str_('23'), np.str_('24'), np.str_('25'), np.str_('26'), np.str_('27')]


2025-12-08 22:32:23,135 - INFO - Subject sub16: 2 invalid trials identified.
2025-12-08 22:32:23,136 - INFO - Subject sub16: 6 invalid trials identified.
2025-12-08 22:32:23,136 - INFO - Processing subject: sub17


Reading 0 ... 1194879  =      0.000 ...  1194.879 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Used Annotations descriptions: [np.str_('101'), np.str_('102'), np.str_('103'), np.str_('11'), np.str_('111'), np.str_('112'), np.str_('12'), np.str_('121'), np.str_('122'), np.str_('123'), np.str_('13'), np.str_('14'), np.str_('15'), np.str_('16'), np.str_('17'), np.str_('21'), np.str_('22'), np.str_('23'), np.str_('24'), np.str_('25'), np.str_('26'), np.str_('27')]


2025-12-08 22:32:23,645 - INFO - Subject sub17: 3 invalid trials identified.
2025-12-08 22:32:23,645 - INFO - Subject sub17: 4 invalid trials identified.
2025-12-08 22:32:23,645 - INFO - Processing subject: sub18


Reading 0 ... 1092959  =      0.000 ...  1092.959 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Used Annotations descriptions: [np.str_('101'), np.str_('102'), np.str_('103'), np.str_('11'), np.str_('111'), np.str_('112'), np.str_('12'), np.str_('121'), np.str_('122'), np.str_('123'), np.str_('13'), np.str_('14'), np.str_('15'), np.str_('16'), np.str_('17'), np.str_('21'), np.str_('22'), np.str_('23'), np.str_('24'), np.str_('25'), np.str_('26'), np.str_('27')]


2025-12-08 22:32:24,124 - INFO - Subject sub18: 4 invalid trials identified.
2025-12-08 22:32:24,125 - INFO - Subject sub18: 6 invalid trials identified.
2025-12-08 22:32:24,127 - INFO - Saved invalid trials for 32 trials.
2025-12-08 22:32:24,128 - INFO - Saved invalid eeg trials for 146 trials.


In [None]:
# =============================================================================
# EEG preprocess
# =============================================================================
# Load the index of excluded subjects
excluded_sub_df = pd.read_csv(os.path.join(PATH_RAW_DATA_BIDS, 'sub_excluded.csv'), header=None)
excluded_sub_set = set(excluded_sub_df.iloc[:, 0].tolist())
bad_chs_full = []

for sub_eeg, sub_beh in zip(SUBJECT_IDS_EEG, SUBJECT_IDS_BEH):
    if sub_beh in excluded_sub_set:
        continue
    sub_idx = sub_beh.replace('emotion_decision_', '')
    logger.info(f"Processing subject: {sub_idx}")

    # === Step 1: Load eeg data ===
    # Set path for loading 
    path_eeg = os.path.join(DS_ROOT_EEG, sub_eeg+'.cnt')
    assert os.path.exists(path_eeg), f"EEG file not found: {path_eeg}"
    # Load eeg data
    eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)

    # === Step 2: Main content of preprocessing ===
    # == 2.1 Set channel type to recgnize ==
    eeg_preprocess = eeg_cnt.copy().set_channel_types({'HEOG': 'eog', 'VEOG': 'eog'})

    # == 2.2 Resample ==
    eeg_preprocess.resample(256, npad="auto")
    
    # == 2.3 Filtering ==
    eeg_preprocess.filter(1, 30, fir_design='firwin', picks=['eeg'])

    # == 2.4 re-reference ==
    eeg_preprocess.set_eeg_reference('average')  

    # == 2.5 Indentify bad channels by visual inspection==
    # Plot PSD to check first
    eeg_preprocess.compute_psd(fmax=100).plot(picks="data", exclude="bads", amplitude=False)
    # Plot EEG data
    eeg_preprocess.plot(n_channels = 31,  block=True) #scalings = 20e-6,
    # Record bad channels
    bad_chans = eeg_preprocess.info['bads']
    if len(bad_chans) == 0:
        # No bad channels: add a raw, bad_channel is NaN
        bad_chs_full.append({
            "subj_idx": sub_idx,
            "bad_channel": pd.NA
        })
    else:
        # hava bad channels
        for bad_chan in bad_chans:
            bad_chs_full.append({
                "subj_idx": sub_idx,
                "bad_channel": bad_chan
            })

    # == 2.7 Remove artifacts using ICA ==
    # ica remove artifact
    eeg_chs = mne.pick_types(eeg_preprocess.info, eeg=True, exclude='bads') 
    n_eeg_good = len(eeg_chs)
    ica = mne.preprocessing.ICA(n_components=n_eeg_good, random_state=42, method='fastica')
    ica.fit(eeg_preprocess) 
    ica.exclude = []                                   
    eog_indices, eog_scores = ica.find_bads_eog(eeg_preprocess)                                                                     
    ica.exclude = eog_indices
    ica.apply(eeg_preprocess) 

    # == 2.8 Plot again to check ==
    eeg_preprocess.plot(n_channels = 31,  block=True)

    # === Step 3: Save preprocessed data into BIDS ===
    bids_path = BIDSPath(
        subject=sub_eeg.replace('emotion_sub', ''),
        task='emotionJudgement',
        datatype='eeg',
        root=PATH_PREPROCESS_DATA_BIDS
    )

    write_raw_bids(
        raw=eeg_preprocess,
        bids_path=bids_path,
        format='BrainVision',
        allow_preload=True,
        overwrite=True,
        verbose=False
    )

# Save full bad channels cross subjects
bad_chs_full = pd.DataFrame(bad_chs_full)
bad_chs_full.to_csv(
        os.path.join(PATH_PREPROCESS_DATA_BIDS, 'all_bad_channels.csv'),
        index=False
    )

2025-12-08 23:19:35,077 - INFO - Processing subject: Emotion_decision_sub2


Reading 0 ... 1119599  =      0.000 ...  1119.599 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 845 samples (3.301 s)

EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Effective window size : 8.000 (s)
Plotting power spectral density (dB=True).
Channels marked as bad:
[np.str_('FP2'), np.str_('FP1'), np.str_('F7'), np.str_('FCZ'), np.str_('FC3'), np.str_('FT7'), np.str_('F4'), np.str_('F8'), np.str_('FZ'), np.str_('F3')]
Fitting ICA to data using 21 channels (please be patient, this may take 

  ica.fit(eeg_cnt_re_reference)


... filtering ICA sources
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 samples (10.000 s)

... filtering target
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 sam

  write_raw_bids(
  write_raw_bids(
2025-12-08 23:20:01,678 - INFO - Processing subject: Emotion_decision_sub3


Reading 0 ... 1238319  =      0.000 ...  1238.319 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 845 samples (3.301 s)

EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Effective window size : 8.000 (s)
Plotting power spectral density (dB=True).
Channels marked as bad:
none
Fitting ICA to data using 31 channels (please be patient, this may take a while)
Selecting by number: 31 components
Fitting ICA took 2.2s.
Using EOG channels: HEOG, VEOG
... filtering ICA sources
Setting up band-pass filter

  ica.fit(eeg_cnt_re_reference)


... filtering ICA sources
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 samples (10.000 s)

... filtering target
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 sam

  write_raw_bids(
  write_raw_bids(
2025-12-08 23:20:13,720 - INFO - Processing subject: Emotion_decision_sub4


Reading 0 ... 1083839  =      0.000 ...  1083.839 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 845 samples (3.301 s)

EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Effective window size : 8.000 (s)
Plotting power spectral density (dB=True).
Channels marked as bad:
none
Fitting ICA to data using 31 channels (please be patient, this may take a while)
Selecting by number: 31 components
Fitting ICA took 4.8s.
Using EOG channels: HEOG, VEOG
... filtering ICA sources
Setting up band-pass filter

  ica.fit(eeg_cnt_re_reference)


... filtering ICA sources
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 samples (10.000 s)

... filtering target
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 sam

  write_raw_bids(
  write_raw_bids(
2025-12-08 23:20:28,473 - INFO - Processing subject: Emotion_decision_sub6


Reading 0 ... 1161999  =      0.000 ...  1161.999 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 845 samples (3.301 s)

EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Effective window size : 8.000 (s)
Plotting power spectral density (dB=True).
Channels marked as bad:
none
Fitting ICA to data using 31 channels (please be patient, this may take a while)
Selecting by number: 31 components
Fitting ICA took 3.2s.
Using EOG channels: HEOG, VEOG
... filtering ICA sources
Setting up band-pass filter

  ica.fit(eeg_cnt_re_reference)


... filtering target
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 samples (10.000 s)

... filtering ICA sources
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 sam

  write_raw_bids(
  write_raw_bids(
2025-12-08 23:20:47,821 - INFO - Processing subject: Emotion_decision_sub7


Reading 0 ... 1001799  =      0.000 ...  1001.799 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 845 samples (3.301 s)

EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Effective window size : 8.000 (s)
Plotting power spectral density (dB=True).
Channels marked as bad:
none
Fitting ICA to data using 31 channels (please be patient, this may take a while)
Selecting by number: 31 components
Fitting ICA took 2.5s.
Using EOG channels: HEOG, VEOG
... filtering ICA sources
Setting up band-pass filter

  ica.fit(eeg_cnt_re_reference)


... filtering ICA sources
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 samples (10.000 s)

... filtering target
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 sam

  write_raw_bids(
  write_raw_bids(
2025-12-08 23:20:59,722 - INFO - Processing subject: Emotion_decision_sub8


Reading 0 ... 1228519  =      0.000 ...  1228.519 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 845 samples (3.301 s)

EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Effective window size : 8.000 (s)
Plotting power spectral density (dB=True).
Channels marked as bad:
none
Fitting ICA to data using 31 channels (please be patient, this may take a while)
Selecting by number: 31 components
Fitting ICA took 3.6s.
Using EOG channels: HEOG, VEOG
... filtering ICA sources
Setting up band-pass filter

  ica.fit(eeg_cnt_re_reference)


... filtering ICA sources
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 samples (10.000 s)

... filtering target
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 sam

  write_raw_bids(
  write_raw_bids(
2025-12-08 23:21:12,565 - INFO - Processing subject: Emotion_decision_sub9


Reading 0 ... 1100399  =      0.000 ...  1100.399 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 845 samples (3.301 s)

EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Effective window size : 8.000 (s)
Plotting power spectral density (dB=True).
Channels marked as bad:
none
Fitting ICA to data using 31 channels (please be patient, this may take a while)
Selecting by number: 31 components
Fitting ICA took 2.5s.
Using EOG channels: HEOG, VEOG
... filtering ICA sources
Setting up band-pass filter

  ica.fit(eeg_cnt_re_reference)


... filtering ICA sources
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 samples (10.000 s)

... filtering target
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 sam

  write_raw_bids(
  write_raw_bids(
2025-12-08 23:21:24,339 - INFO - Processing subject: Emotion_decision_sub10


Reading 0 ... 1121519  =      0.000 ...  1121.519 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 845 samples (3.301 s)

EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Effective window size : 8.000 (s)
Plotting power spectral density (dB=True).
Channels marked as bad:
none
Fitting ICA to data using 31 channels (please be patient, this may take a while)
Selecting by number: 31 components
Fitting ICA took 2.3s.
Using EOG channels: HEOG, VEOG
... filtering ICA sources
Setting up band-pass filter

  ica.fit(eeg_cnt_re_reference)


... filtering target
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 samples (10.000 s)

... filtering ICA sources
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 sam

  write_raw_bids(
  write_raw_bids(
2025-12-08 23:21:36,157 - INFO - Processing subject: Emotion_decision_sub11


Reading 0 ... 1215879  =      0.000 ...  1215.879 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 845 samples (3.301 s)

EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Effective window size : 8.000 (s)
Plotting power spectral density (dB=True).
Channels marked as bad:
none
Fitting ICA to data using 31 channels (please be patient, this may take a while)
Selecting by number: 31 components
Fitting ICA took 1.8s.
Using EOG channels: HEOG, VEOG
... filtering ICA sources
Setting up band-pass filter

  ica.fit(eeg_cnt_re_reference)


... filtering target
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 samples (10.000 s)

... filtering ICA sources
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 sam

  write_raw_bids(
  write_raw_bids(
2025-12-08 23:21:48,090 - INFO - Processing subject: Emotion_decision_sub14


Reading 0 ... 1380359  =      0.000 ...  1380.359 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 845 samples (3.301 s)

EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Effective window size : 8.000 (s)
Plotting power spectral density (dB=True).
Channels marked as bad:
none
Fitting ICA to data using 31 channels (please be patient, this may take a while)
Selecting by number: 31 components
Fitting ICA took 3.5s.
Using EOG channels: HEOG, VEOG
... filtering ICA sources
Setting up band-pass filter

  ica.fit(eeg_cnt_re_reference)


... filtering target
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 samples (10.000 s)

... filtering ICA sources
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 sam

  write_raw_bids(
  write_raw_bids(
2025-12-08 23:22:01,481 - INFO - Processing subject: Emotion_decision_sub15


Reading 0 ... 1099599  =      0.000 ...  1099.599 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 845 samples (3.301 s)

EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Effective window size : 8.000 (s)
Plotting power spectral density (dB=True).
Channels marked as bad:
none
Fitting ICA to data using 31 channels (please be patient, this may take a while)
Selecting by number: 31 components
Fitting ICA took 2.4s.
Using EOG channels: HEOG, VEOG
... filtering ICA sources
Setting up band-pass filter

  ica.fit(eeg_cnt_re_reference)


... filtering ICA sources
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 samples (10.000 s)

... filtering target
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 sam

  write_raw_bids(
  write_raw_bids(
2025-12-08 23:22:14,439 - INFO - Processing subject: Emotion_decision_sub16


Reading 0 ... 1164079  =      0.000 ...  1164.079 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 845 samples (3.301 s)

EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Effective window size : 8.000 (s)
Plotting power spectral density (dB=True).
Channels marked as bad:
none
Fitting ICA to data using 31 channels (please be patient, this may take a while)
Selecting by number: 31 components
Fitting ICA took 2.6s.
Using EOG channels: HEOG, VEOG
... filtering ICA sources
Setting up band-pass filter

  ica.fit(eeg_cnt_re_reference)


... filtering target
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 samples (10.000 s)

... filtering ICA sources
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 sam

  write_raw_bids(
  write_raw_bids(
2025-12-08 23:22:28,826 - INFO - Processing subject: Emotion_decision_sub17


Reading 0 ... 1194879  =      0.000 ...  1194.879 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 845 samples (3.301 s)

EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Effective window size : 8.000 (s)
Plotting power spectral density (dB=True).
Channels marked as bad:
none
Fitting ICA to data using 31 channels (please be patient, this may take a while)
Selecting by number: 31 components
Fitting ICA took 4.5s.
Using EOG channels: HEOG, VEOG
... filtering ICA sources
Setting up band-pass filter

  ica.fit(eeg_cnt_re_reference)


... filtering target
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 samples (10.000 s)

... filtering ICA sources
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 sam

  write_raw_bids(
  write_raw_bids(
2025-12-08 23:22:43,863 - INFO - Processing subject: Emotion_decision_sub18


Reading 0 ... 1092959  =      0.000 ...  1092.959 secs...


  eeg_cnt = mne.io.read_raw_cnt(path_eeg,preload=True)


Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 845 samples (3.301 s)

EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Effective window size : 8.000 (s)
Plotting power spectral density (dB=True).
Channels marked as bad:
none
Fitting ICA to data using 31 channels (please be patient, this may take a while)
Selecting by number: 31 components
Fitting ICA took 1.6s.
Using EOG channels: HEOG, VEOG
... filtering ICA sources
Setting up band-pass filter

  ica.fit(eeg_cnt_re_reference)


... filtering ICA sources
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 samples (10.000 s)

... filtering target
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 2560 sam

  write_raw_bids(
  write_raw_bids(


In [None]:
# =============================================================================
# Save cleaned behavior data into BIDS
# =============================================================================
# Load the index of excluded subjects
data_invalid_trials_full = pd.read_csv(os.path.join(PATH_RAW_DATA_BIDS, 'all_invalid_trials.csv'), header=0)
data_invalid_eeg_trials_full = pd.read_csv(os.path.join(PATH_RAW_DATA_BIDS, 'all_invalid_eeg_trials.csv'), header=0)

excluded_sub_df = pd.read_csv(os.path.join(PATH_RAW_DATA_BIDS, 'sub_excluded.csv'), header=None)
excluded_sub_set = set(excluded_sub_df.iloc[:, 0].tolist())

for sub_beh in SUBJECT_IDS_BEH:
    if sub_beh in excluded_sub_set:
        continue
    sub_idx = sub_beh.replace('Emotion_decision_', '')

    # Extract invalid trials. Note that they should be dropped step by step 
    data_invalid_eeg_trial = data_invalid_eeg_trials_full[data_invalid_eeg_trials_full['subj_idx'] == sub_idx]
    invalid_from_eeg = set(data_invalid_eeg_trial['invalid_eeg_trial_index'])
    index_invalid_from_eeg = sorted(invalid_from_eeg)

    data_invalid_trial = data_invalid_trials_full[data_invalid_trials_full['subj_idx'] == sub_idx]
    invalid_from_behavior = set(data_invalid_trial['invalid_trial_index'])
    index_invalid_from_behavior = sorted(invalid_from_behavior)

    logger.info(f"Processing subject: {sub_idx}")

    # === Step 1: Load behavioral and eeg data ===
    # Set path for loading 
    path_beh = os.path.join(DS_ROOT_BEH, sub_beh+'.dat')
    assert os.path.exists(path_beh), f"Behavior file not found: {path_beh}"

    # Load eeg and behavior data
    data_behavior = pd.read_csv(path_beh, header=None)

    # === Step 2: Select sepcific columns (aimed triggers) for behavior (eeg) data ===
    # Select specific columns: [15, 0, 6, 8] â [subj_idx, difficulty, rt, response]
    data_behavior['subj_idx'] = sub_idx           # Add a column to get subject idx
    behavior_selected_cols = data_behavior.iloc[:, [15, 0, 6, 8]].copy()
    behavior_selected_cols.columns = ['subj_idx', 'difficulty', 'rt', 'response']
    
    # === Step 3: Filter and save cleaned behavioral data ===
    # Note that it should drop 'index_invalid_from_eeg' first
    data_behavior_valid_eeg= behavior_selected_cols.drop(index=index_invalid_from_eeg).reset_index(drop=True)
    data_behavior_clean= data_behavior_valid_eeg.drop(index=index_invalid_from_behavior).reset_index(drop=True)

    # Save cleaned behavioral data into BIDS
    bids_path_beh = os.path.join(PATH_PREPROCESS_DATA_BIDS, f"sub-{sub_beh.replace('Emotion_decision_sub', '')}", 'beh')
    os.makedirs(bids_path_beh, exist_ok=True)
    beh_save_path = os.path.join(bids_path_beh, f"sub-{sub_beh.replace('Emotion_decision_sub', '')}_task-emotionJudgement_beh.tsv")
    data_behavior_clean.to_csv(beh_save_path, sep='\t', index=False, na_rep='n/a')

    logger.info(f"Subject {sub_idx}: BIDS conversion completed.")  



2025-12-06 13:51:49,964 - INFO - Processing subject: sub2
2025-12-06 13:51:49,968 - INFO - Subject sub2: BIDS conversion completed.
2025-12-06 13:51:49,969 - INFO - Processing subject: sub3
2025-12-06 13:51:49,972 - INFO - Subject sub3: BIDS conversion completed.
2025-12-06 13:51:49,973 - INFO - Processing subject: sub4
2025-12-06 13:51:49,976 - INFO - Subject sub4: BIDS conversion completed.
2025-12-06 13:51:49,977 - INFO - Processing subject: sub6
2025-12-06 13:51:49,980 - INFO - Subject sub6: BIDS conversion completed.
2025-12-06 13:51:49,980 - INFO - Processing subject: sub7
2025-12-06 13:51:49,985 - INFO - Subject sub7: BIDS conversion completed.
2025-12-06 13:51:49,986 - INFO - Processing subject: sub8
2025-12-06 13:51:49,988 - INFO - Subject sub8: BIDS conversion completed.
2025-12-06 13:51:49,989 - INFO - Processing subject: sub9
2025-12-06 13:51:49,991 - INFO - Subject sub9: BIDS conversion completed.
2025-12-06 13:51:49,991 - INFO - Processing subject: sub10
2025-12-06 13:51: