# Process TUAB
> Preprocessing and feature computation for the TUAB dataset

In [1]:
import pandas as pd

from pathlib import Path
from functools import partial
from dask.distributed import Client, LocalCluster, fire_and_forget
from dask_jobqueue import LSFCluster
from fastcore.transform import Pipeline

import core.preprocessing as proc
from core.config import cfg

In [2]:
# Load participants tsv and exclude replication subjects
df_participants = pd.read_csv(Path(cfg["DATASETS"]["TUAB"]["bids_root"]) / 'participants.tsv', sep='\t')

# Add information form participants_extra file
df_participants_extra = pd.read_csv(Path(cfg["DATASETS"]["TUAB"]["bids_root"]) / 'participants_extra.csv').iloc[:, 1:]
df_participants_extra['participant_id'] = ('sub-' + df_participants_extra.participant_id.astype(str).str.zfill(4))
df_participants = df_participants_extra.query('session == 1').reset_index().iloc[:, 1:].merge(df_participants, on='participant_id')
df_participants = df_participants.query("not pathological")  # exclude pathological samples
df_participants = df_participants.loc[~df_participants.participant_id.isin(['sub-0388', 'sub-0766', 'sub-2289'])]  # exclude bad subjects

subjects = df_participants.participant_id

In [3]:
# Define channel categories
eeg_channels = [
    'Fp1', 'Fp2', 'F3', 'F4', 'C3', 'C4', 'P3', 'P4', 'O1',
    'O2', 'F7', 'F8', 'T3', 'T4', 'T5', 'T6','Fz', 'Cz', 'Pz',
    'A1', 'A2'
]

In [4]:
# Create partial function to set dataset parameters common to all preprocessing pipelines
make_tuab_config = partial(proc.make_config, bids_root=Path(cfg["DATASETS"]["TUAB"]["bids_root"]), session='001', run='001', tasks=['rest'], notch_filter_freq=60, analyze_channels=eeg_channels)

In [6]:
# Setup Dask Cluster
#
# Note: Feel free to configure your own dask cluster if you have access
# to more computatial resources. For example an LSFCluster:
#  
# cluster = LSFCluster(
#     queue="short",
#     nanny=False,
#     maximum_jobs=1000,
# )

cluster = LocalCluster()
client = Client(cluster)

## Preprocessing and Feature Computation with Different Pipelines

In [6]:
# Minimal
pipeline = Pipeline([
    proc.read_raw_bids_root,
    proc.crop,
    proc.filter,
    proc.resample,
    proc.make_epochs,
    proc.select_channels,
    proc.set_eeg_reference,
    proc.save_epochs,
    proc.compute_meeglet_features,
    proc.save_features
])

output_dir = Path(cfg["DATASETS"]["TUAB"]["deriv_root"]) / 'preproc_minimal'
output_dir.mkdir(parents=True, exist_ok=True)

configs = [make_tuab_config(sub, deriv_root=output_dir) for sub in subjects]

fire_and_forget(client.map(pipeline, configs))

In [8]:
# Autoreject
pipeline = Pipeline([
    proc.read_raw_bids_root,
    proc.crop,
    proc.filter,
    proc.resample,
    proc.make_epochs,
    proc.select_channels,
    proc.compute_auto_reject,
    proc.set_eeg_reference,
    proc.save_epochs,
    proc.compute_meeglet_features,
    proc.save_features
])

output_dir = Path(cfg["DATASETS"]["TUAB"]["deriv_root"]) / 'preproc_autoreject'
output_dir.mkdir(parents=True, exist_ok=True)

configs = [make_tuab_config(sub, deriv_root=output_dir) for sub in subjects]

fire_and_forget(client.map(pipeline, configs))

In [9]:
# Autoreject + ICA
pipeline = Pipeline([
    proc.read_raw_bids_root,
    proc.crop,
    proc.filter,
    proc.resample,
    proc.make_epochs,
    proc.select_channels,
    proc.compute_auto_reject,
    proc.apply_ica,
    proc.set_eeg_reference,
    proc.save_epochs,
    proc.compute_meeglet_features,
    proc.save_features
])

output_dir = Path(cfg["DATASETS"]["TUAB"]["deriv_root"]) / 'preproc_autoreject_ica'
output_dir.mkdir(parents=True, exist_ok=True)

configs = [make_tuab_config(sub, deriv_root=output_dir) for sub in subjects]

fire_and_forget(client.map(pipeline, configs))

In [10]:
# Artifact ICA subspace (all)
pipeline = Pipeline([
    proc.read_raw_bids_root,
    proc.crop,
    proc.filter,
    proc.resample,
    proc.make_epochs,
    proc.select_channels,
    proc.compute_auto_reject,
    proc.apply_ica,
    proc.set_eeg_reference,
    proc.save_epochs,
    proc.compute_meeglet_features,
    proc.save_features
])

output_dir = Path(cfg["DATASETS"]["TUAB"]["deriv_root"]) / 'ica_artifact_subspace'
output_dir.mkdir(parents=True, exist_ok=True)

configs = [make_tuab_config(sub, deriv_root=output_dir, ic_rejection_criterion="keep_artifacts") for sub in subjects]

fire_and_forget(client.map(pipeline, configs))

In [11]:
# Artifact ICA subspace (ocular)
pipeline = Pipeline([
    proc.read_raw_bids_root,
    proc.crop,
    proc.filter,
    proc.resample,
    proc.make_epochs,
    proc.select_channels,
    proc.compute_auto_reject,
    proc.apply_ica,
    proc.set_eeg_reference,
    proc.save_epochs,
    proc.compute_meeglet_features,
    proc.save_features
])

output_dir = Path(cfg["DATASETS"]["TUAB"]["deriv_root"]) / 'ica_ocular_artifact_subspace'
output_dir.mkdir(parents=True, exist_ok=True)

configs = [make_tuab_config(sub, deriv_root=output_dir, ic_rejection_criterion="keep_ocular_artifacts") for sub in subjects]

fire_and_forget(client.map(pipeline, configs))

In [12]:
# Artifact ICA subspace (muscle)
pipeline = Pipeline([
    proc.read_raw_bids_root,
    proc.crop,
    proc.filter,
    proc.resample,
    proc.make_epochs,
    proc.select_channels,
    proc.compute_auto_reject,
    proc.apply_ica,
    proc.set_eeg_reference,
    proc.save_epochs,
    proc.compute_meeglet_features,
    proc.save_features
])

output_dir = Path(cfg["DATASETS"]["TUAB"]["deriv_root"]) / 'ica_muscle_artifact_subspace'
output_dir.mkdir(parents=True, exist_ok=True)

configs = [make_tuab_config(sub, deriv_root=output_dir, ic_rejection_criterion="keep_muscle_artifacts") for sub in subjects]

fire_and_forget(client.map(pipeline, configs))

In [13]:
# Artifact ICA subspace (other)
pipeline = Pipeline([
    proc.read_raw_bids_root,
    proc.crop,
    proc.filter,
    proc.resample,
    proc.make_epochs,
    proc.select_channels,
    proc.compute_auto_reject,
    proc.apply_ica,
    proc.set_eeg_reference,
    proc.save_epochs,
    proc.compute_meeglet_features,
    proc.save_features
])

output_dir = Path(cfg["DATASETS"]["TUAB"]["deriv_root"]) / 'ica_other_artifact_subspace'
output_dir.mkdir(parents=True, exist_ok=True)

configs = [make_tuab_config(sub, deriv_root=output_dir, ic_rejection_criterion="keep_other_artifacts") for sub in subjects]

fire_and_forget(client.map(pipeline, configs))