From 2938fcc6bd45a7ac3a56c8638b0721c33aac2720 Mon Sep 17 00:00:00 2001 From: Bru Date: Fri, 2 Jun 2023 16:17:59 +0200 Subject: [PATCH] Update dataset info (#389) * Fixing data meta info * Fixing description Cho and BNCI * Fixing order * Fixing the SSVEP and P300 * Adding new script * Fixing saving * Updating script * Fixing columns * Updating the script to process trial/events * Updating the whats_new.rst * Update docs/source/dataset_summary.rst Co-authored-by: Sylvain Chevallier * Update docs/source/dataset_summary.rst Co-authored-by: Sylvain Chevallier * Update docs/source/dataset_summary.rst Co-authored-by: Sylvain Chevallier * Update moabb/datasets/gigadb.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Fixing small things --------- Co-authored-by: Sylvain Chevallier --- docs/source/dataset_summary.rst | 46 ++++---- docs/source/whats_new.rst | 2 + moabb/datasets/Lee2019.py | 2 +- moabb/datasets/bnci.py | 10 +- moabb/datasets/braininvaders.py | 6 +- moabb/datasets/gigadb.py | 2 +- moabb/datasets/ssvep_wang.py | 2 +- scripts/generating_metainfo.py | 194 ++++++++++++++++++++++++++++++++ 8 files changed, 230 insertions(+), 34 deletions(-) create mode 100644 scripts/generating_metainfo.py diff --git a/docs/source/dataset_summary.rst b/docs/source/dataset_summary.rst index 5e80f2bae..3b47ad898 100644 --- a/docs/source/dataset_summary.rst +++ b/docs/source/dataset_summary.rst @@ -17,25 +17,25 @@ Motor Imagery ====================== .. csv-table:: - :header: Dataset, #Subj, #Chan, #Classes, #Trials, len, Sampling rate, #Sessions, #Trials*#Sessions + :header: Dataset, #Subj, #Chan, #Classes, #Trials, Trial length, Freq, #Session, #Runs, Total_trials :class: sortable - AlexMI,8,16,3,20,3s,512Hz,1,20 - BNCI2014001,10,22,4,144,4s,250Hz,2,288 - BNCI2014002,15,15,2,80,5s,512Hz,1,80 - BNCI2014004,10,3,2,360,4.5s,250Hz,5,1800 - BNCI2015001,13,13,2,200,5s,512Hz,2,400 - BNCI2015004,10,30,5,80,7s,256Hz,2,160 - Cho2017,53,64,2,100,3s,512Hz,1,100 - Lee2019_MI,55,62,2,100,4s,1000Hz,2,200 - MunichMI,10,128,2,150,7s,500Hz,1,150 - Schirrmeister2017,14,128,4,120,4s,500Hz,1,120 - Ofner2017,15,61,7,60,3s,512Hz,1,60 - PhysionetMI,109,64,4,23,3s,160Hz,1,23 - Shin2017A,29,30,2,30,10s,200Hz,3,90 - Shin2017B,29,30,2,30,10s,200Hz,3,90 - Weibo2014,10,60,7,80,4s,200Hz,1,80 - Zhou2016,4,14,3,160,5s,250Hz,3,480 + AlexMI,8,16,3,20,3s,512Hz,1,1,480 + BNCI2014001,9,22,4,144,4s,250Hz,2,6,62208 + BNCI2014002,14,15,2,80,5s,512Hz,1,8,17920 + BNCI2014004,9,3,2,360,4.5s,250Hz,5,1,32400 + BNCI2015001,12,13,2,200,5s,512Hz,3,1,14400 + BNCI2015004,9,30,5,80,7s,256Hz,2,1,7200 + Cho2017,52,64,2,100,3s,512Hz,1,1,9800 + Lee2019_MI,55,62,2,100,4s,1000Hz,2,1,11000 + MunichMI,10,128,2,150,7s,500Hz,1,1,3000 + Schirrmeister2017,14,128,4,120,4s,500Hz,1,2,13440 + Ofner2017,15,61,7,60,3s,512Hz,1,10,63000 + PhysionetMI,109,64,4,23,3s,160Hz,1,1,69760 + Shin2017A,29,30,2,30,10s,200Hz,3,1,5220 + Shin2017B,29,30,2,30,10s,200Hz,3,1,5220 + Weibo2014,10,60,7,80,4s,200Hz,1,1,5600 + Zhou2016,4,14,3,160,5s,250Hz,3,2,11496 P300/ERP ====================== @@ -49,10 +49,10 @@ P300/ERP BNCI2015003, 10, 8, 1500 NT / 300 T, 0.8s, 256Hz, 1 bi2012, 25, 16, 6140 NT / 128 T, 1s, 512Hz, 2 bi2013a, 24, 16, 3200 NT / 640 T, 1s, 512Hz, 8 for subjects 1-7 else 1 - bi2014a, 71, 16, , 1s, 512Hz, up to 3 - bi2014b, 38, 32, , 1s, 512Hz, 3 - bi2015a, 50, 32, , 1s, 512Hz, 3 - bi2015b, 44, 32, , 1s, 512Hz, 2 + bi2014a, 64, 16, 990 NT / 198 T, 1s, 512Hz, up to 3 + bi2014b, 37, 32, 200 NT / 40 T, 1s, 512Hz, 3 + bi2015a, 43, 32, 4131 NT / 825 T, 1s, 512Hz, 3 + bi2015b, 44, 32, 2160 NT / 480 T, 1s, 512Hz, 2 VirtualReality, 24, 16, 600 NT / 120 T, 1s, 512Hz, 2 Huebner2017, 13, 31, , 0.9s, 1000Hz, 1 Huebner2018, 12, 31, , 0.9s, 1000Hz, 1 @@ -68,13 +68,13 @@ SSVEP :header: Dataset, #Subj, #Chan, #Classes, #Trials / class, Trials length, Sampling rate, #Sessions :class: sortable - Lee2019_SSVEP,24,16,4,25,1s,1000Hz,1 + Lee2019_SSVEP,54,16,4,25,1s,1000Hz,1 SSVEPExo,12,8,4,16,2s,256Hz,1 MAMEM1,10,256,5,12-15,3s,250Hz,1 MAMEM2,10,256,5,20-30,3s,250Hz,1 MAMEM3,10,14,4,20-30,3s,128Hz,1 Nakanishi2015,9,8,12,15,4.15s,256Hz,1 - Wang2016,32,62,40,6,5s,250Hz,1 + Wang2016,34,62,40,6,5s,250Hz,1 diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index 5880c89bf..b08689aaf 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -18,6 +18,8 @@ Develop branch Enhancements ~~~~~~~~~~~~ +- Adding new script to get the meta information of the datasets (:gh:`389` by `Bruno Aristimunha`_) +- Fixing the dataset description based on the meta information (:gh:`389` by `Bruno Aristimunha`_) - Adding second deployment of the documentation (:gh:`374` by `Bruno Aristimunha`_) - Adding Parallel evaluation for :func:`moabb.evaluations.WithinSessionEvaluation` , :func:`moabb.evaluations.CrossSessionEvaluation` (:gh:`364` by `Bruno Aristimunha`_) diff --git a/moabb/datasets/Lee2019.py b/moabb/datasets/Lee2019.py index ae3e9c667..d4e35fbd2 100644 --- a/moabb/datasets/Lee2019.py +++ b/moabb/datasets/Lee2019.py @@ -395,7 +395,7 @@ class Lee2019_SSVEP(Lee2019): ============= ======= ======= ========== ================= =============== =============== =========== Name #Subj #Chan #Classes #Trials / class Trials length Sampling rate #Sessions ============= ======= ======= ========== ================= =============== =============== =========== - Lee2019_SSVEP 24 16 4 25 1s 1000Hz 1 + Lee2019_SSVEP 54 16 4 25 1s 1000Hz 1 ============= ======= ======= ========== ================= =============== =============== =========== Dataset from Lee et al 2019 [1]_. diff --git a/moabb/datasets/bnci.py b/moabb/datasets/bnci.py index 81a3b40ce..51b4bdbb9 100644 --- a/moabb/datasets/bnci.py +++ b/moabb/datasets/bnci.py @@ -686,7 +686,7 @@ class BNCI2014001(MNEBNCI): =========== ======= ======= ========== ================= ============ =============== =========== Name #Subj #Chan #Classes #Trials / class Trials len Sampling rate #Sessions =========== ======= ======= ========== ================= ============ =============== =========== - BNCI2014001 10 22 4 144 4s 250Hz 2 + BNCI2014001 9 22 4 144 4s 250Hz 2 =========== ======= ======= ========== ================= ============ =============== =========== Dataset IIa from BCI Competition 4 [1]_. @@ -750,7 +750,7 @@ class BNCI2014002(MNEBNCI): =========== ======= ======= ========== ================= ============ =============== =========== Name #Subj #Chan #Classes #Trials / class Trials len Sampling rate #Sessions =========== ======= ======= ========== ================= ============ =============== =========== - BNCI2014002 15 15 2 80 5s 512Hz 1 + BNCI2014002 14 15 2 80 5s 512Hz 1 =========== ======= ======= ========== ================= ============ =============== =========== Motor Imagery Dataset from [1]_. @@ -813,7 +813,7 @@ class BNCI2014004(MNEBNCI): =========== ======= ======= ========== ================= ============ =============== =========== Name #Subj #Chan #Classes #Trials / class Trials len Sampling rate #Sessions =========== ======= ======= ========== ================= ============ =============== =========== - BNCI2014004 10 3 2 360 4.5s 250Hz 5 + BNCI2014004 9 3 2 360 4.5s 250Hz 5 =========== ======= ======= ========== ================= ============ =============== =========== Dataset B from BCI Competition 2008. @@ -1030,7 +1030,7 @@ class BNCI2015001(MNEBNCI): =========== ======= ======= ========== ================= ============ =============== =========== Name #Subj #Chan #Classes #Trials / class Trials len Sampling rate #Sessions =========== ======= ======= ========== ================= ============ =============== =========== - BNCI2015001 13 13 2 200 5s 512Hz 2 + BNCI2015001 12 13 2 200 5s 512Hz 2 =========== ======= ======= ========== ================= ============ =============== =========== Dataset from [1]_. @@ -1131,7 +1131,7 @@ class BNCI2015004(MNEBNCI): =========== ======= ======= ========== ================= ============ =============== =========== Name #Subj #Chan #Classes #Trials / class Trials len Sampling rate #Sessions =========== ======= ======= ========== ================= ============ =============== =========== - BNCI2015004 10 30 5 80 7s 256Hz 2 + BNCI2015004 9 30 5 80 7s 256Hz 2 =========== ======= ======= ========== ================= ============ =============== =========== Dataset from [1]_. diff --git a/moabb/datasets/braininvaders.py b/moabb/datasets/braininvaders.py index 42d01ee95..647281287 100644 --- a/moabb/datasets/braininvaders.py +++ b/moabb/datasets/braininvaders.py @@ -576,7 +576,7 @@ class bi2014a(BaseDataset): ================ ======= ======= ================ =============== =============== =========== Name #Subj #Chan #Trials/class Trials length Sampling Rate #Sessions ================ ======= ======= ================ =============== =============== =========== - bi2014a 71 16 5 NT x 1 T 1s 512Hz up to 3 + bi2014a 64 16 5 NT x 1 T 1s 512Hz up to 3 ================ ======= ======= ================ =============== =============== =========== This dataset contains electroencephalographic (EEG) recordings of 71 subjects @@ -634,7 +634,7 @@ class bi2014b(BaseDataset): ================ ======= ======= ================ =============== =============== =========== Name #Subj #Chan #Trials/class Trials length Sampling Rate #Sessions ================ ======= ======= ================ =============== =============== =========== - bi2014b 38 32 5 NT x 1 T 1s 512Hz 3 + bi2014b 37 32 5 NT x 1 T 1s 512Hz 3 ================ ======= ======= ================ =============== =============== =========== This dataset contains electroencephalographic (EEG) recordings of 38 subjects playing in @@ -693,7 +693,7 @@ class bi2015a(BaseDataset): ================ ======= ======= ================ =============== =============== =========== Name #Subj #Chan #Trials/class Trials length Sampling Rate #Sessions ================ ======= ======= ================ =============== =============== =========== - bi2015a 50 32 5 NT x 1 T 1s 512Hz 3 + bi2015a 43 32 5 NT x 1 T 1s 512Hz 3 ================ ======= ======= ================ =============== =============== =========== This dataset contains electroencephalographic (EEG) recordings diff --git a/moabb/datasets/gigadb.py b/moabb/datasets/gigadb.py index cd1002f53..cccaa015e 100644 --- a/moabb/datasets/gigadb.py +++ b/moabb/datasets/gigadb.py @@ -27,7 +27,7 @@ class Cho2017(BaseDataset): ======= ======= ======= ========== ================= ============ =============== =========== Name #Subj #Chan #Classes #Trials / class Trials len Sampling rate #Sessions ======= ======= ======= ========== ================= ============ =============== =========== - Cho2017 53 64 2 100 3s 512Hz 1 + Cho2017 52 64 2 100 3s 512Hz 1 ======= ======= ======= ========== ================= ============ =============== =========== Dataset from the paper [1]_. diff --git a/moabb/datasets/ssvep_wang.py b/moabb/datasets/ssvep_wang.py index cc114ed86..8c25e4332 100644 --- a/moabb/datasets/ssvep_wang.py +++ b/moabb/datasets/ssvep_wang.py @@ -30,7 +30,7 @@ class Wang2016(BaseDataset): ======== ======= ======= ========== ================= =============== =============== =========== Name #Subj #Chan #Classes #Trials / class Trials length Sampling rate #Sessions ======== ======= ======= ========== ================= =============== =============== =========== - Wang2016 32 62 40 6 5s 250Hz 1 + Wang2016 34 62 40 6 5s 250Hz 1 ======== ======= ======= ========== ================= =============== =============== =========== Dataset from [1]_. diff --git a/scripts/generating_metainfo.py b/scripts/generating_metainfo.py new file mode 100644 index 000000000..5cbf44765 --- /dev/null +++ b/scripts/generating_metainfo.py @@ -0,0 +1,194 @@ +from argparse import ArgumentParser +from pathlib import Path + +import mne +import numpy as np +import pandas as pd + +import moabb +from moabb.datasets.utils import dataset_search +from moabb.utils import set_download_dir + + +columns_name = [ + "Dataset", + "#Subj", + "#Chan", + "#Classes", + "trials/events", + "Window Size (s)", + "Freq (Hz)", + "#Session", + "#Runs", + "Total_trials", +] + + +def parser_init(): + parser = ArgumentParser(description="Getting the meta-information script for MOABB") + + parser.add_argument( + "-mne_p", + "--mne_data", + dest="mne_data", + default=Path.home() / "mne_data", + type=Path, + help="Folder where to save and load the datasets with mne structure.", + ) + + return parser + + +def process_trial_freq(trials_per_events, prdgm): + """ + Function to process the trial frequency. + Getting the median value if the paradigm is MotorImagery. + + Parameters + ---------- + trials_per_events: dict + prdgm: str + + Returns + ------- + trial_freq: str + """ + class_per_trial = list(trials_per_events.values()) + + if prdgm == "imagery" or prdgm == "ssvep": + return f"{int(np.median(class_per_trial))}" + elif prdgm == "p300": + not_target = max(trials_per_events.values()) + target = min(trials_per_events.values()) + return f"NT{not_target} / T {target}" + + +def get_meta_info(dataset, dataset_name, paradigm, prdgm_name): + """ + Function to get the meta-information of a dataset. + + Parameters + ---------- + dataset: BaseDataset + Dataset object + dataset_name: str + Dataset name + paradigm: BaseParadigm + Paradigm object to process the dataset + prdgm_name: str + Paradigm name + + Returns + ------- + + """ + subjects = len(dataset.subject_list) + session = dataset.n_sessions + + X, _, metadata = paradigm.get_data(dataset, [1], return_epochs=True) + + sfreq = int(X.info["sfreq"]) + nchan = X.info["nchan"] + runs = len(metadata["run"].unique()) + classes = len(X.event_id) + epoch_size = X.tmax - X.tmin + + trials_per_events = mne.count_events(X.events) + total_trials = int(sum(trials_per_events.values())) + trial_class = process_trial_freq(trials_per_events, prdgm_name) + + info_dataset = pd.Series( + [ + dataset_name, + subjects, + nchan, + classes, + trial_class, + epoch_size, + sfreq, + session, + runs, + session * runs * total_trials * subjects, + ], + index=columns_name, + ) + + return info_dataset + + +if __name__ == "__main__": + mne.set_log_level(False) + + parser = parser_init() + options = parser.parse_args() + mne_path = Path(options.mne_data) + + set_download_dir(mne_path) + + paradigms = {} + paradigms["imagery"] = moabb.paradigms.MotorImagery() + paradigms["ssvep"] = moabb.paradigms.SSVEP() + paradigms["p300"] = moabb.paradigms.P300() + + for prdgm_name, paradigm in paradigms.items(): + dataset_list = dataset_search(paradigm=prdgm_name) + + metainfo = [] + for dataset in dataset_list: + dataset_name = str(dataset).split(".")[-1].split(" ")[0] + + dataset_path = f"{mne_path.parent}/metainfo/metainfo_{dataset_name}.csv" + + if not dataset_path.exists(): + print( + "Trying to get the meta information from the " + f"dataset {dataset} with {prdgm_name}" + ) + + try: + info_dataset = get_meta_info( + dataset, dataset_name, paradigm, prdgm_name + ) + print( + "Saving the meta information for the dataset in the file: ", + dataset_path, + ) + info_dataset.to_csv(dataset_path) + metainfo.append(info_dataset) + + except Exception as ex: + print(f"Error with {dataset} with {prdgm_name} paradigm", end=" ") + print(f"Error: {ex}") + + if prdgm_name == "imagery": + print("Trying with the LeftRightImagery paradigm") + prdgm2 = moabb.paradigms.LeftRightImagery() + try: + info_dataset = get_meta_info( + dataset, dataset_name, prdgm2, prdgm_name + ) + print( + "Saving the meta information for the dataset in the file: ", + dataset_path, + ) + info_dataset.to_csv(dataset_path) + metainfo.append(info_dataset) + + except Exception as ex: + print( + f"Error with {dataset} with {prdgm_name} paradigm", + end=" ", + ) + print(f"Error: {ex}") + else: + print(f"Loading the meta information from {dataset_path}") + info_dataset = pd.read_csv(dataset_path) + metainfo.append(info_dataset) + + paradigm_df = pd.concat(metainfo, axis=1).T + + paradigm_df.columns = columns_name + dataset_path = mne_path.parent / "metainfo" / f"metainfo_{dataset_name}.csv" + print(f"Saving the meta information for the paradigm {dataset_path}") + + paradigm_df.to_csv(dataset_path, index=None)