-
Notifications
You must be signed in to change notification settings - Fork 166
/
sosulski2019.py
197 lines (159 loc) · 8.58 KB
/
sosulski2019.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import glob
import os
import re
import zipfile
import mne
from moabb.datasets import download as dl
from moabb.datasets.base import BaseDataset
SPOT_PILOT_P300_URL = (
"https://freidok.uni-freiburg.de/fedora/objects/freidok:154576/datastreams"
)
class Sosulski2019(BaseDataset):
"""P300 dataset from initial spot study.
Dataset [1]_, study on spatial transfer between SOAs [2]_, actual paradigm / online optimization [3]_.
.. admonition:: Dataset summary
============= ======= ======= ================= =============== =============== ===========
Name #Subj #Chan #Trials / class Trials length Sampling rate #Sessions
============= ======= ======= ================= =============== =============== ===========
Sosulski2019 13 31 7500 NT / 1500 T 1.2s 1000Hz 1
============= ======= ======= ================= =============== =============== ===========
**Dataset description**
This dataset contains multiple small trials of an auditory oddball paradigm. The paradigm presented two different
sinusoidal tones. A low-pitched (500 Hz, 40 ms duration) non-target tone and a high-pitched (1000 Hz,
40 ms duration) target tone. Subjects were instructed to attend to the high-pitched target tones and ignore the
low-pitched tones.
One trial (= one file) consisted of 90 tones, 15 targets and 75 non-targets. The order was pseudo-randomized in a
way that at least two non-target tones occur between two target tones. Additionally, if you split the 90 tones of
one trial into consecutive sets of six tones, there will always be exactly one target and five non-target tones
in each set.
In the first part of the experiment (run 1), each subject performed 50-70 trials with various different stimulus
onset asynchronies (SOAs) -- i.e. the time between the onset of successive tones -- for each trial. In the second
part (run 2), 4-5 SOAs were played, with blocks of 5 trials having the same SOA. All SOAs were in the range of 60
ms to 600 ms. Regardless of the experiment part, after a set of five trials, subjects were given the opportunity
to take a short break to e.g. drink etc.
Finally, before and after each run, resting data was recorded. One minute with eyes open and one minute with eyes
closed, i.e. in total four minutes of resting data are available for each subject.
Data was recorded using a BrainAmp DC (BrainVision) amplifier and a 31 passive electrode EasyCap. The cap was
placed according to the extended 10-20 electrode layout. The reference electrode was placed on the nose. Before
recording, the cap was prepared such that impedances on all electrodes were around 20 kOhm. The EEG signal was
recorded at 1000 Hz.
The data contains 31 scalp channels, one EOG channel and five miscellaneous non-EEG signal channels. However,
only scalp EEG and the EOG channel is available in all subjects. The markers in the marker file indicate the
onset of target tones (21) and non-target tones (1).
.. caution::
Note that this wrapper currently only loads the second part of the experiment and uses pseudo-sessions
to achieve the functionality to handle different conditions in MOABB. As a result, the statistical testing
features of MOABB cannot be used for this dataset.
References
----------
.. [1] Sosulski, J., Tangermann, M.: Electroencephalogram signals recorded from 13 healthy subjects during an
auditory oddball paradigm under different stimulus onset asynchrony conditions.
Dataset. DOI: 10.6094/UNIFR/154576
.. [2] Sosulski, J., Tangermann, M.: Spatial filters for auditory evoked potentials transfer between different
experimental conditions. Graz BCI Conference. 2019.
.. [3] Sosulski, J., Hübner, D., Klein, A., Tangermann, M.: Online Optimization of Stimulation Speed in
an Auditory Brain-Computer Interface under Time Constraints. arXiv preprint. 2021.
Notes
-----
.. versionadded:: 0.4.5
"""
def __init__(
self,
use_soas_as_sessions=True,
load_soa_60=False,
reject_non_iid=False,
interval=None,
):
"""
:param use_soa_as_sessions: 1800 epochs were recorded at different SOAs each. Depending on
the subject between 3 and 4 (4-5 if 60 is loaded). Training classifiers on mixtures of SOAs
rarely is useful. Setting this to True loads these as individual sessions for e.g.
WithinSessionEvaluation.
:param load_soa_60: whether to load SOA 60. Note that this was always recorded, but the
recorded ERP was extremely weak (as expected).
:param reject_non_iid: if true removes the first 6 and last 6 epochs of each trial.
"""
self.load_soa_60 = load_soa_60
self.reject_non_iid = reject_non_iid
self.stimulus_modality = "tone_oddball"
self.n_channels = 31
self.use_soas_as_sessions = use_soas_as_sessions
self.description_map = {"Stimulus/S 21": "Target", "Stimulus/S 1": "NonTarget"}
code = "Sosulski2019"
interval = [-0.2, 1] if interval is None else interval
super().__init__(
subjects=list(range(1, 13 + 1)),
sessions_per_subject=1,
events=dict(Target=21, NonTarget=1),
code=code,
interval=interval,
paradigm="p300",
doi="10.6094/UNIFR/154576",
)
@staticmethod
def _map_subject_to_filenumber(subject_number):
# The ordering of the uploaded files on freidok makes no sense, this function maps subject_numbers to corresponding files
mapping = [5, 2, 4, 6, 3, 1, 10, 7, 12, 9, 8, 11, 13]
return mapping[subject_number - 1]
@staticmethod
def filename_trial_info_extraction(filepath):
info_pattern = "Oddball_Run_([0-9]+)_Trial_([0-9]+)_SOA_[0-9]\\.([0-9]+)\\.vhdr"
filename = filepath.split(os.path.sep)[-1]
trial_info = dict()
re_matches = re.match(info_pattern, filename)
trial_info["run"] = int(re_matches.group(1))
trial_info["trial"] = int(re_matches.group(2))
trial_info["soa"] = int(re_matches.group(3))
return trial_info
def _get_single_run_data(self, file_path):
non_scalp_channels = ["EOGvu", "x_EMGl", "x_GSR", "x_Respi", "x_Pulse", "x_Optic"]
raw = mne.io.read_raw_brainvision(
file_path, misc=non_scalp_channels, preload=True
)
raw.set_montage("standard_1020")
if self.reject_non_iid:
raw.set_annotations(raw.annotations[7:85]) # non-iid rejection
raw.annotations.rename(self.description_map)
return raw
def _get_single_subject_data(self, subject):
"""Return data for a single subject."""
file_path_list = self.data_path(subject)
sessions = {}
for p_i, file_path in enumerate(file_path_list):
file_exp_info = Sosulski2019.filename_trial_info_extraction(file_path)
soa = file_exp_info["soa"]
# trial = file_exp_info["trial"]
if soa == 60 and not self.load_soa_60:
continue
if self.use_soas_as_sessions:
session_name = f"{p_i}soa{soa}"
else:
session_name = "0"
if session_name not in sessions.keys():
sessions[session_name] = {}
if self.use_soas_as_sessions:
run_name = f"0soa{soa}"
else:
run_name = f"{p_i}soa{soa}"
sessions[session_name][run_name] = self._get_single_run_data(file_path)
return sessions
def data_path(
self, subject, path=None, force_update=False, update_path=None, verbose=None
):
if subject not in self.subject_list:
raise (ValueError("Invalid subject number"))
# check if has the .zip
file_number = Sosulski2019._map_subject_to_filenumber(subject)
url = f"{SPOT_PILOT_P300_URL}/FILE{file_number}/content"
path_zip = dl.data_dl(url, "spot")
path_folder = path_zip[:-8] + f"/subject{subject}"
# check if has to unzip
if not (os.path.isdir(path_folder)):
print("unzip", path_zip)
zip_ref = zipfile.ZipFile(path_zip, "r")
zip_ref.extractall(path_zip[:-7])
# get the path to all files
# We only load data from the second run. The first run is a potpourri of SOAs
pattern = "/*Run_2*.vhdr"
subject_paths = glob.glob(path_folder + pattern)
return sorted(subject_paths)