Skip to content

Commit

Permalink
Neiry Demons P300 dataset (#156)
Browse files Browse the repository at this point in the history
* alpha version of dataset

* tap

* added references

* added url, extended description

* fixed data_path, added concat draft

* concated epochs

* fixed naming and docstring

* fixed naming

* added stim channels description to docstring

* code style fixes

* code style enhancement

* run code quality check

* fixed channels formatting to black style

* Neiry Dataset: bugfix with integer index

* style fix

* added dataset to docs generation

* fixed sessions per subject prarmeter

Co-authored-by: Alina-Samokhina <alina.samokhina@phystech.edu>
  • Loading branch information
v-goncharenko and Alina-Samokhina committed Mar 30, 2021
1 parent d9334f5 commit c652c77
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 36 deletions.
1 change: 1 addition & 0 deletions docs/source/datasets.rst
Expand Up @@ -21,6 +21,7 @@ Motor Imagery Datasets
BNCI2015001
BNCI2015004
Cho2017
DemonsP300
Lee2019_MI
MunichMI
Ofner2017
Expand Down
4 changes: 2 additions & 2 deletions moabb/datasets/__init__.py
Expand Up @@ -4,6 +4,7 @@
and will convert them into a MNE raw object. There are options to pool all the
different recording sessions per subject or to evaluate them separately.
"""
# flake8: noqa
from .alex_mi import AlexMI
from .bbci_eeg_fnirs import Shin2017A, Shin2017B
from .bnci import (
Expand All @@ -18,11 +19,10 @@
)
from .braininvaders import bi2013a
from .epfl import EPFLP300

# flake8: noqa
from .gigadb import Cho2017
from .Lee2019 import Lee2019_MI
from .mpi_mi import MunichMI
from .neiry import DemonsP300
from .physionet_mi import PhysionetMI
from .schirrmeister2017 import Schirrmeister2017
from .ssvep_exo import SSVEPExo
Expand Down
39 changes: 5 additions & 34 deletions moabb/datasets/epfl.py
Expand Up @@ -83,42 +83,13 @@ def _get_single_run_data(self, file_path):

# meta-info from the readme.pdf
sfreq = 2048
# fmt: off
ch_names = [
"Fp1",
"AF3",
"F7",
"F3",
"FC1",
"FC5",
"T7",
"C3",
"CP1",
"CP5",
"P7",
"P3",
"Pz",
"PO3",
"O1",
"Oz",
"O2",
"PO4",
"P4",
"P8",
"CP6",
"CP2",
"C4",
"T8",
"FC6",
"FC2",
"F4",
"F8",
"AF4",
"Fp2",
"Fz",
"Cz",
"MA1",
"MA2",
"Fp1", "AF3", "F7", "F3", "FC1", "FC5", "T7", "C3", "CP1", "CP5", "P7", "P3",
"Pz", "PO3", "O1", "Oz", "O2", "PO4", "P4", "P8", "CP6", "CP2", "C4", "T8",
"FC6", "FC2", "F4", "F8", "AF4", "Fp2", "Fz", "Cz", "MA1", "MA2",
]
# fmt: on
ch_types = ["eeg"] * 32 + ["misc"] * 2

# The last X entries are 0 for all signals. This leads to
Expand Down
188 changes: 188 additions & 0 deletions moabb/datasets/neiry.py
@@ -0,0 +1,188 @@
import zipfile
from pathlib import Path

import h5py
import numpy as np
from mne import create_info
from mne.channels import make_standard_montage
from mne.io import RawArray

from . import download as dl
from .base import BaseDataset


class DemonsP300(BaseDataset):
"""Visual P300 dataset recorded in Virtual Reality (VR) game Raccoons versus Demons.
**Dataset Description**
We publish dataset of visual P300 BCI performed in Virtual Reality (VR) game Raccoons versus
Demons (RvD). Data contains reach labels incorporating information about stimulus chosen enabling us
to estimate model’s confidence at each stimulus prediction stage.
`target` channel contains standard P300 target/non-target labels,
while `mult_target` channel contains multiclass labels (numbers of activated stimuli).
**Participants**
60 healthy participants (23 males) naive to BCI with mean age 28 years from 19 to 45 y.o. took part in the study.
All subject signed informed consent and passed primary prerequisites on their health and condition.
**Stimulation and EEG recording**
The EEG was recorded with NVX-52 encephalograph (MCS, Zelenograd, Russia) at 500 Hz. We used 8 sponge
electrodes (Cz, P3, P4, PO3, POz, PO4, O1, O2). Stimuli were presented with HTC Vive Pro VR headset with
TTL hardware sync
**Experimental procedure**
Participants were asked to play the P300 BCI game in virtual reality.
BCI was embedded into a game plot with the player posing as a forest warden.
The player was supposed to feed animals and protect them from demons.
Game mechanics consisted in demons jumping (visually activating),
so player have to concentrate on one demon (chosen freely). That produced
P300 response in time of the deamon jump. That was the way to trigger fireball
torwards a deamon predicted by classifier from EEG data.
More info can be found in [1]_ [2]_.
References
----------
.. [1] Goncharenko V., Grigoryan R., and Samokhina A. (May 12, 2020),
Raccoons vs Demons: multiclass labeled P300 dataset,
https://arxiv.org/abs/2005.02251
.. [2] Goncharenko V., Grigoryan R., and Samokhina A.,
Approaches to multiclass classifcation of P300 potential datasets,
Intelligent Data Processing: Theory and Applications:Book of abstract of
the 13th International Conference, Moscow, 2020. — Moscow: Russian
Academy of Sciences, 2020. — 472 p.ISBN 978-5-907366-16-9
http://www.machinelearning.ru/wiki/images/3/31/Idp20.pdf
.. [3] Goncharenko V., Grigoryan R., and Samokhina A.,
P300 potentials dataset and approaches to its processing,
Труды 63-й Всероссийской научной конференции МФТИ. 23–29 ноября 2020
года. Прикладные математика и информатика. — Москва : МФТИ, 2020. – 334 с.
ISBN 978-5-7417-0757-9
https://mipt.ru/science/5top100/education/courseproposal/%D0%A4%D0%9F%D0%9C%D0%98%20%D1%84%D0%B8%D0%BD%D0%B0%D0%BB-compressed2.pdf
"""

ch_names = ["Cz", "P3", "Pz", "P4", "PO3", "PO4", "O1", "O2"]
sampling_rate = 500.0
url = "https://gin.g-node.org/v-goncharenko/neiry-demons/raw/master/nery_demons_dataset.zip"

_ms_in_sec = 1000
_hdf_path = "p300dataset"
_ds_folder_name = "demons"

_act_dtype = np.dtype(
[
("id", np.int),
("target", np.int),
("is_train", np.bool),
("prediction", np.int),
("sessions", np.object), # list of `_session_dtype`
]
)
_session_dtype = np.dtype(
[
("eeg", np.object),
("starts", np.object),
("stimuli", np.object),
]
)

def __init__(self):
super().__init__(
subjects=list(range(60)),
sessions_per_subject=1,
events={"Target": 1, "NonTarget": 2},
code="Demons P300",
interval=[0, 1],
paradigm="p300",
)
self.path = None
self.subjects_filenames = None

@staticmethod
def _strip(session) -> tuple:
"""Strips nans (from right side of all channels) added during hdf5 packaging
Returns:
tuple ready to be converted to `_session_dtype`
"""
eeg, *rest = session
ind = -next(i for i, value in enumerate(eeg[0, ::-1]) if not np.isnan(value))
if ind == 0:
ind = None
return tuple((eeg[:, :ind], *rest))

@classmethod
def read_hdf(cls, filename) -> np.ndarray:
"""Reads data from HDF file
Returns:
array of `_act_dtype`
"""
with h5py.File(filename, "r") as hfile:
group = hfile[cls._hdf_path]
record = np.empty(len(group), cls._act_dtype)
for i, act in enumerate(group.values()):
record[i]["sessions"] = np.array(
[cls._strip(item) for item in act], cls._session_dtype
)
for name, value in act.attrs.items():
record[i][name] = value
return record

def _get_single_subject_data(self, subject: int):
record = self.read_hdf(self.data_path(subject))

info = create_info(
self.ch_names + ["mult_target", "target"],
self.sampling_rate,
["eeg"] * len(self.ch_names) + ["misc", "stim"],
)
montage = make_standard_montage("standard_1020")

runs_raw = {}
for i, act in enumerate(record):
# target and stims are increased by 1
# because the channel is filled with zeros by default
target = act["target"] + 1
run_data = []
for eeg, starts, stims in act["sessions"]:
starts = starts * self.sampling_rate / self._ms_in_sec
starts = starts.round().astype(np.int)
stims = stims + 1
stims_channel = np.zeros(eeg.shape[1])
target_channel = np.zeros(eeg.shape[1])

for start, stimul in zip(starts, stims):
stims_channel[start] = stimul
target_channel[start] = 1 if stimul == target else 2

round_data = np.vstack(
(eeg, stims_channel[None, :], target_channel[None, :])
)
run_data.append(round_data)

raw = RawArray(np.hstack(run_data), info)
raw.set_montage(montage)
runs_raw[f"run_{i}"] = raw
return {"session_0": runs_raw}

def data_path(
self, subject: int, path=None, force_update=False, update_path=None, verbose=None
):
if subject not in self.subject_list:
raise ValueError("Invalid subject number")

zip_path = Path(dl.data_path(self.url, self._ds_folder_name))
self.path = zip_path.parent / self._ds_folder_name / zip_path.stem

if not self.path.exists():
with zipfile.ZipFile(zip_path) as zip_file:
zip_file.extractall(self.path.parent)

self.subjects_filenames = sorted(self.path.glob("*.hdf5"))

return self.subjects_filenames[subject].as_posix()

0 comments on commit c652c77

Please sign in to comment.