/
age.py
130 lines (110 loc) · 5.42 KB
/
age.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# -*- coding: utf-8 -*-
# Authors: Alexandre Gramfort <alexandre.gramfort@inria.fr>
# Joan Massich <mailsik@gmail.com>
#
# License: BSD Style.
import numpy as np
from ...utils import verbose
from ._utils import _fetch_one, _data_path, _on_missing, AGE_SLEEP_RECORDS
from ._utils import _check_subjects
data_path = _data_path # expose _data_path(..) as data_path(..)
BASE_URL = 'https://physionet.org/physiobank/database/sleep-edfx/sleep-cassette/' # noqa: E501
@verbose
def fetch_data(subjects, recording=[1, 2], path=None, force_update=False,
update_path=None, base_url=BASE_URL, on_missing='raise',
verbose=None): # noqa: D301
"""Get paths to local copies of PhysioNet Polysomnography dataset files.
This will fetch data from the publicly available subjects from PhysioNet's
study of age effects on sleep in healthy subjects [1]_[2]_. This
corresponds to a subset of 153 recordings from 37 males and 41 females that
were 25-101 years old at the time of the recordings. There are two night
recordings per subject except for subjects 13, 36 and 52 which have one
record missing each due to missing recording hardware.
See more details in
`physionet website <https://physionet.org/physiobank/database/sleep-edfx/sleep-cassette/>`_.
Parameters
----------
subjects : list of int
The subjects to use. Can be in the range of 0-82 (inclusive), however
the following subjects are not available: 39, 68, 69, 78 and 79.
recording : list of int
The night recording indices. Valid values are : [1], [2], or [1, 2].
The following recordings are not available: recording 1 for subject 36
and 52, and recording 2 for subject 13.
path : None | str
Location of where to look for the PhysioNet data storing location.
If None, the environment variable or config parameter
``MNE_DATASETS_PHYSIONET_SLEEP_PATH`` is used. If it doesn't exist, the
"~/mne_data" directory is used. If the Polysomnography dataset
is not found under the given path, the data
will be automatically downloaded to the specified folder.
force_update : bool
Force update of the dataset even if a local copy exists.
update_path : bool | None
If True, set the MNE_DATASETS_EEGBCI_PATH in mne-python
config to the given path. If None, the user is prompted.
on_missing : 'raise' | 'warn' | 'ignore'
What to do if one or several recordings are not available. Valid keys
are 'raise' | 'warn' | 'ignore'. Default is 'error'. If on_missing
is 'warn' it will proceed but warn, if 'ignore' it will proceed
silently.
%(verbose)s
Returns
-------
paths : list
List of local data paths of the given type.
Notes
-----
For example, one could do:
>>> from mne.datasets import sleep_physionet
>>> sleep_physionet.age.fetch_data(subjects=[0]) # doctest: +SKIP
This would download data for subject 0 if it isn't there already.
References
----------
.. [1] MS Mourtazaev, B Kemp, AH Zwinderman, HAC Kamphuisen. Age and gender
affect different characteristics of slow waves in the sleep EEG.
Sleep 18(7):557–564 (1995).
.. [2] Goldberger AL, Amaral LAN, Glass L, Hausdorff JM, Ivanov PCh,
Mark RG, Mietus JE, Moody GB, Peng C-K, Stanley HE. (2000)
PhysioBank, PhysioToolkit, and PhysioNet: Components of a New
Research Resource for Complex Physiologic Signals.
Circulation 101(23):e215-e220
See Also
--------
:func:`mne.datasets.sleep_physionet.temazepam.fetch_data`
""" # noqa: E501
records = np.loadtxt(AGE_SLEEP_RECORDS,
skiprows=1,
delimiter=',',
usecols=(0, 1, 2, 6, 7),
dtype={'names': ('subject', 'record', 'type', 'sha',
'fname'),
'formats': ('<i2', 'i1', '<S9', 'S40', '<S22')}
)
psg_records = records[np.where(records['type'] == b'PSG')]
hyp_records = records[np.where(records['type'] == b'Hypnogram')]
path = data_path(path=path, update_path=update_path)
params = [path, force_update, base_url]
_check_subjects(
subjects, 83, missing=[39, 68, 69, 78, 79], on_missing=on_missing)
# Check for missing recordings
if set(subjects) & {36, 52} and 1 in recording:
msg = ('Requested recording 1 for subject 36 and/or 52, but it is not '
'available in corpus.')
_on_missing(on_missing, msg)
if 13 in subjects and 2 in recording:
msg = ('Requested recording 2 for subject 13, but it is not available '
'in corpus.')
_on_missing(on_missing, msg)
fnames = []
for subject in subjects:
for idx in np.where(psg_records['subject'] == subject)[0]:
if psg_records['record'][idx] in recording:
psg_fname = _fetch_one(psg_records['fname'][idx].decode(),
psg_records['sha'][idx].decode(),
*params)
hyp_fname = _fetch_one(hyp_records['fname'][idx].decode(),
hyp_records['sha'][idx].decode(),
*params)
fnames.append([psg_fname, hyp_fname])
return fnames