In [1]:
from pathlib import Path
from dask.distributed import Client, LocalCluster, fire_and_forget

from core.config import cfg
from core.benchmark import create_benchmark_configs, run_benchmark_cv

In [2]:
def compute_benchmark(benchmark, output_root=None, sample=None):
    """
    Executes a given benchmark and saves the results in csv format if the output_root argument is specified.
    The results are saved according to the following folder structure: output_root/dataset/processing/target/features/model.
    """
    
    result, ys = run_benchmark_cv(benchmark, n_splits=10, sample=sample, group_column=('diagnosis_code' if benchmark.dataset == "tdbrain" else None))

    if output_root is not None:  # save results
        save_dir = Path(output_root) / benchmark.dataset / benchmark.processing / benchmark.target / benchmark.features / benchmark.model
        save_dir.mkdir(parents=True, exist_ok=True)
        result.to_csv(save_dir / 'results.csv')
        ys.to_csv(save_dir / 'ys.csv')

    return result, ys


In [3]:
# Setup Dask Cluster
# Note: Feel free to configure your own dask cluster if you have access to more
# computatial resources. For example an LSFCluster with multiple GPUs.

gpu_available = False

cluster = LocalCluster()
client = Client(cluster)


In [13]:
# Create benchmark configs

# Add preprocessing level benchmarks
benchmarks = create_benchmark_configs(
    datasets = ["TDBRAIN", "TUAB"],
    models = ["naive_pca", "log_diag_pca", "spoc_log", "riemann", "shallow"],
    features = ["meeglet", "raw"],
    processings = ["preproc_minimal", "preproc_autoreject", "preproc_autoreject_ica"],
    targets = ["age", "sex"]
)

# Add Aux channel benchmarks
benchmarks += create_benchmark_configs(
    datasets = ["TDBRAIN"],
    models = ["naive_pca", "log_diag_pca", "spoc_log", "riemann", "shallow"],
    features = ["meeglet", "raw"],
    processings = ["aux_and_eeg_channels", "aux_channels", "aux_ocular_channels", "aux_non_oculuar_channels"],
    targets = ["age", "sex"]
)

# Add ICA subspace benchmarks
benchmarks += create_benchmark_configs(
    datasets = ["TDBRAIN", "TUAB"],
    models = ["naive_pca", "log_diag_pca", "spoc_log", "riemann", "shallow"],
    features = ["meeglet", "raw"],
    processings = ["ica_artifact_subspace", "ica_ocular_artifact_subspace", "ica_muscle_artifact_subspace", "ica_other_artifact_subspace"],
    targets = ["age", "sex"]
)

In [None]:
# Note: In version 3.0.1 of the TUAB dataset, the age of subjects with age >= 90 has been set to 999.
# If you are using this version of TUAB, the following should be used to exclude these subjects for
# the age prediction task.
#
# from fastcore.transform import Transform, Pipeline
# from copy import deepcopy
#
# for benchmark in benchmarks:
#     if (benchmark.dataset == "TUAB") and (benchmark.target == "age"):
#         assert isinstance(benchmark.filter_func, Pipeline)
#         new_filter_func = deepcopy(benchmark.filter_func)
#         new_filter_func.add(Transform(lambda df: df[df["age"] < 90]))
#         benchmark.filter_func = new_filter_func

In [5]:
# Submit jobs to cluster
for bm in benchmarks:
    if gpu_available and bm.features == "raw":  # request GPU for models operating on raw data
        fire_and_forget(client.submit(compute_benchmark, bm, output_root=cfg["RESULTS"]["results_path"], resources={'GPU': 1}))
    else:
        fire_and_forget(client.submit(compute_benchmark, bm, output_root=cfg["RESULTS"]["results_path"]))

In [None]:
# The client dashboard can be used to monitor progress
client