# Process TDBRAIN
> Preprocessing and feature computation for the TDBRAIN dataset

In [1]:
import pandas as pd

from pathlib import Path
from functools import partial
from dask.distributed import Client, LocalCluster, fire_and_forget
from dask_jobqueue import LSFCluster
from fastcore.transform import Pipeline

import core.preprocessing as proc
from core.config import cfg

In [2]:
# Load participants tsv and exclude replication subjects
df_participants = pd.read_csv(Path(cfg["DATASETS"]["TDBRAIN"]["bids_root"]) / 'participants.tsv', sep='\t')
df_participants = df_participants.query("indication != 'REPLICATION'")  # exclude replication samples
df_participants = df_participants.query("sessID == 1")  # restrict to session 1
subjects = df_participants.participant_id

In [5]:
# Define channel categories
eeg_channels = [
    'Fp1', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'FC3', 'FCz', 'FC4',
    'T7', 'C3', 'Cz', 'C4', 'T8', 'CP3', 'CPz', 'CP4', 'P7', 'P3',
    'Pz', 'P4', 'P8', 'O1', 'Oz', 'O2'
]

aux_channels = ['VPVA', 'VNVB', 'HPHL', 'HNHR', 'Erbs', 'OrbOcc', 'Mass']
aux_ocular_channels = ['VPVA','VNVB', 'HPHL', 'HNHR', 'OrbOcc']
aux_non_ocular_channels = ['Erbs', 'Mass']
aux_and_eeg_channels = eeg_channels + aux_channels

In [6]:
# Create partial function to set dataset parameters common to all preprocessing pipelines
make_tdbrain_config = partial(proc.make_config, bids_root=Path(cfg["DATASETS"]["TDBRAIN"]["bids_root"]), session='1', tasks=['restEC', 'restEO'], run=None, notch_filter_freq = 50)

In [7]:
# Setup Dask Cluster
#
# Note: Feel free to configure your own dask cluster if you have access
# to more computatial resources. For example an LSFCluster:
#  
# cluster = LSFCluster(
#     queue="short",
#     nanny=False,
#     maximum_jobs=1000,
# )

cluster = LocalCluster()
client = Client(cluster)

## Preprocessing and Feature Computation with Different Pipelines

In [10]:
# Minimal
pipeline = Pipeline([
    proc.read_raw_bids_root,
    proc.set_montage,
    proc.filter,
    proc.resample,
    proc.make_epochs,
    proc.select_channels,
    proc.set_eeg_reference,
    proc.save_epochs,
    proc.compute_meeglet_features,
    proc.save_features
])

output_dir = Path(cfg["DATASETS"]["TDBRAIN"]["deriv_root"]) / 'preproc_minimal'
output_dir.mkdir(parents=True, exist_ok=True)

configs = [make_tdbrain_config(sub, deriv_root=output_dir, analyze_channels=eeg_channels) for sub in subjects]

fire_and_forget(client.map(pipeline, configs))

In [8]:
# Autoreject
pipeline = Pipeline([
    proc.read_raw_bids_root,
    proc.set_montage,
    proc.filter,
    proc.resample,
    proc.make_epochs,
    proc.select_channels,
    proc.compute_auto_reject,
    proc.set_eeg_reference,
    proc.save_epochs,
    proc.compute_meeglet_features,
    proc.save_features
])

output_dir = Path(cfg["DATASETS"]["TDBRAIN"]["deriv_root"]) / 'preproc_autoreject'
output_dir.mkdir(parents=True, exist_ok=True)

configs = [make_tdbrain_config(sub, deriv_root=output_dir, analyze_channels=eeg_channels) for sub in subjects]

fire_and_forget(client.map(pipeline, configs))

In [9]:
# Autoreject + ICA
pipeline = Pipeline([
    proc.read_raw_bids_root,
    proc.set_montage,
    proc.filter,
    proc.resample,
    proc.make_epochs,
    proc.select_channels,
    proc.compute_auto_reject,
    proc.apply_ica,
    proc.set_eeg_reference,
    proc.save_epochs,
    proc.compute_meeglet_features,
    proc.save_features
])

output_dir = Path(cfg["DATASETS"]["TDBRAIN"]["deriv_root"]) / 'preproc_autoreject_ica'
output_dir.mkdir(parents=True, exist_ok=True)

configs = [make_tdbrain_config(sub, deriv_root=output_dir, analyze_channels=eeg_channels) for sub in subjects]

fire_and_forget(client.map(pipeline, configs))

In [10]:
# Aux channels (all)
pipeline = Pipeline([
    proc.read_raw_bids_root,
    proc.set_montage,
    proc.map_artifact_ch_to_eeg,
    proc.filter,
    proc.resample,
    proc.make_epochs,
    proc.select_channels,
    proc.save_epochs,
    proc.compute_meeglet_features,
    proc.save_features
])

output_dir = Path(cfg["DATASETS"]["TDBRAIN"]["deriv_root"]) / 'aux_channels'
output_dir.mkdir(parents=True, exist_ok=True)

configs = [make_tdbrain_config(sub, deriv_root=output_dir, analyze_channels=aux_channels) for sub in subjects]

fire_and_forget(client.map(pipeline, configs))

In [12]:
# Aux channels (ocular)
pipeline = Pipeline([
    proc.read_raw_bids_root,
    proc.set_montage,
    proc.map_artifact_ch_to_eeg,
    proc.filter,
    proc.resample,
    proc.make_epochs,
    proc.select_channels,
    proc.save_epochs,
    proc.compute_meeglet_features,
    proc.save_features
])

output_dir = Path(cfg["DATASETS"]["TDBRAIN"]["deriv_root"]) / 'aux_ocular_channels'
output_dir.mkdir(parents=True, exist_ok=True)

configs = [make_tdbrain_config(sub, deriv_root=output_dir, analyze_channels=aux_ocular_channels) for sub in subjects]

fire_and_forget(client.map(pipeline, configs))

In [13]:
# Aux channels (non-ocular)
pipeline = Pipeline([
    proc.read_raw_bids_root,
    proc.set_montage,
    proc.map_artifact_ch_to_eeg,
    proc.filter,
    proc.resample,
    proc.make_epochs,
    proc.select_channels,
    proc.save_epochs,
    proc.compute_meeglet_features,
    proc.save_features
])

output_dir = Path(cfg["DATASETS"]["TDBRAIN"]["deriv_root"]) / 'aux_non_ocular_channels'
output_dir.mkdir(parents=True, exist_ok=True)

configs = [make_tdbrain_config(sub, deriv_root=output_dir, analyze_channels=aux_non_ocular_channels) for sub in subjects]

fire_and_forget(client.map(pipeline, configs))

In [14]:
# AUX + EEG
pipeline = Pipeline([
    proc.read_raw_bids_root,
    proc.set_montage,
    proc.filter,
    proc.resample,
    proc.make_epochs,
    proc.select_channels,
    proc.set_eeg_reference,
    proc.save_epochs,
    proc.compute_meeglet_features,
    proc.save_features
])

output_dir = Path(cfg["DATASETS"]["TDBRAIN"]["deriv_root"]) / 'aux_and_eeg_channels'
output_dir.mkdir(parents=True, exist_ok=True)

configs = [make_tdbrain_config(sub, deriv_root=output_dir, analyze_channels=aux_and_eeg_channels) for sub in subjects]

fire_and_forget(client.map(pipeline, configs))

In [15]:
# ICA artifact subspace (all)
pipeline = Pipeline([
    proc.read_raw_bids_root,
    proc.set_montage,
    proc.filter,
    proc.resample,
    proc.make_epochs,
    proc.select_channels,
    proc.compute_auto_reject,
    proc.apply_ica,
    proc.set_eeg_reference,
    proc.save_epochs,
    proc.compute_meeglet_features,
    proc.save_features
])

output_dir = Path(cfg["DATASETS"]["TDBRAIN"]["deriv_root"]) / 'ica_artifact_subspace'
output_dir.mkdir(parents=True, exist_ok=True)

configs = [make_tdbrain_config(sub, deriv_root=output_dir, analyze_channels=eeg_channels, ic_rejection_criterion="keep_artifacts") for sub in subjects]

fire_and_forget(client.map(pipeline, configs))

In [16]:
# ICA artifact subspace (ocular)
pipeline = Pipeline([
    proc.read_raw_bids_root,
    proc.set_montage,
    proc.filter,
    proc.resample,
    proc.make_epochs,
    proc.select_channels,
    proc.compute_auto_reject,
    proc.apply_ica,
    proc.set_eeg_reference,
    proc.save_epochs,
    proc.compute_meeglet_features,
    proc.save_features
])

output_dir = Path(cfg["DATASETS"]["TDBRAIN"]["deriv_root"]) / 'ica_ocular_artifact_subspace'
output_dir.mkdir(parents=True, exist_ok=True)

configs = [make_tdbrain_config(sub, deriv_root=output_dir, analyze_channels=eeg_channels, ic_rejection_criterion="keep_ocular_artifacts") for sub in subjects]

fire_and_forget(client.map(pipeline, configs))

In [17]:
# ICA artifact subspace (muscle)
pipeline = Pipeline([
    proc.read_raw_bids_root,
    proc.set_montage,
    proc.filter,
    proc.resample,
    proc.make_epochs,
    proc.select_channels,
    proc.compute_auto_reject,
    proc.apply_ica,
    proc.set_eeg_reference,
    proc.save_epochs,
    proc.compute_meeglet_features,
    proc.save_features
])

output_dir = Path(cfg["DATASETS"]["TDBRAIN"]["deriv_root"]) / 'ica_muscle_artifact_subspace'
output_dir.mkdir(parents=True, exist_ok=True)

configs = [make_tdbrain_config(sub, deriv_root=output_dir, analyze_channels=eeg_channels, ic_rejection_criterion="keep_muscle_artifacts") for sub in subjects]

fire_and_forget(client.map(pipeline, configs))

In [18]:
# ICA artifact subspace (other)
pipeline = Pipeline([
    proc.read_raw_bids_root,
    proc.set_montage,
    proc.filter,
    proc.resample,
    proc.make_epochs,
    proc.select_channels,
    proc.compute_auto_reject,
    proc.apply_ica,
    proc.set_eeg_reference,
    proc.save_epochs,
    proc.compute_meeglet_features,
    proc.save_features
])

output_dir = Path(cfg["DATASETS"]["TDBRAIN"]["deriv_root"]) / 'ica_other_artifact_subspace'
output_dir.mkdir(parents=True, exist_ok=True)

configs = [make_tdbrain_config(sub, deriv_root=output_dir, analyze_channels=eeg_channels, ic_rejection_criterion="keep_other_artifacts") for sub in subjects]

fire_and_forget(client.map(pipeline, configs))