In [1]:
import os
import sys
import mne
import h5py
import numpy as np
from math import ceil
# from sklearn.model_selection import train_test_split
sys.path.append("..")

In [2]:
dataset_file = "comprehensive_dataset.h5"
data_dir = os.path.join('..', '..', 'data', 'all-joined-1')
preprocessed_data_dir = os.path.join(data_dir, 'eeg', 'preprocessed')
preprocessed_files = os.listdir(preprocessed_data_dir)

In [4]:
raw = mne.io.read_raw_fif(os.path.join(preprocessed_data_dir, preprocessed_files[0]), preload=True)
list(raw.ch_names)

Opening raw data file ..\..\data\all-joined-1\eeg\preprocessed\subj01_session1_eeg.fif...
    Range : 1121 ... 1777926 =      2.189 ...  3472.512 secs
Ready.
Reading 0 ... 1776805  =      0.000 ...  3470.322 secs...


['Fp1',
 'AF7',
 'AF3',
 'F1',
 'F3',
 'F5',
 'F7',
 'FT7',
 'FC5',
 'FC3',
 'FC1',
 'C1',
 'C3',
 'C5',
 'T7',
 'TP7',
 'CP5',
 'CP3',
 'CP1',
 'P1',
 'P3',
 'P5',
 'P7',
 'P9',
 'PO7',
 'PO3',
 'O1',
 'Iz',
 'Oz',
 'POz',
 'Pz',
 'CPz',
 'Fpz',
 'Fp2',
 'AF8',
 'AF4',
 'AFz',
 'Fz',
 'F2',
 'F4',
 'F6',
 'F8',
 'FT8',
 'FC6',
 'FC4',
 'FC2',
 'FCz',
 'Cz',
 'C2',
 'C4',
 'C6',
 'T8',
 'TP8',
 'CP6',
 'CP4',
 'CP2',
 'P2',
 'P4',
 'P6',
 'P8',
 'P10',
 'PO8',
 'PO4',
 'O2',
 'Status']

In [3]:
if not os.path.exists(os.path.join(data_dir, dataset_file)):
    with h5py.File(os.path.join(data_dir, dataset_file), 'w') as f:
        pass

In [4]:
first_raw = mne.io.read_raw_fif(os.path.join(preprocessed_data_dir, preprocessed_files[0]), preload=True)
first_raw.drop_channels(['Status'])
sfreq = first_raw.info['sfreq']
ch_names = first_raw.info['ch_names']

epoch_config = [
    { 'mode': 'fixed_length_event', 'durations': [60, 30, 10]  },
    { 'mode': 'evoked_event', 'duration_before': 0.05, 'duration_after': 0.6 },
]

with h5py.File(os.path.join(data_dir, dataset_file), 'r+') as f:

    f.create_dataset('sfreq', data=sfreq)
    dt = h5py.special_dtype(vlen=str)
    f.create_dataset('ch_names', data=np.array(ch_names, dtype=dt))

    for config in epoch_config:
            
        if config['mode'] == 'fixed_length_event':
            for dur in config['durations']:
                f.create_dataset(
                    f'all_{dur}s_epochs',
                    shape=(0, len(ch_names), int(sfreq * dur)),
                    maxshape=(None, len(ch_names), int(sfreq * dur)),
                    dtype=np.float32
                )
                f.create_dataset(
                    f'all_{dur}s_epochs_metadata', 
                    shape=(0, 3),  # subject, session, sample_number
                    maxshape=(None, 3),
                    dtype=np.int32
                )
                    
        elif config['mode'] == 'evoked_event':
                timesteps = int(ceil(sfreq * (config['duration_before'] + config['duration_after'])))
                num_channels = len(ch_names)
                f.create_dataset(
                    'all_evoked_event_epochs',
                    shape=(0, num_channels, timesteps),
                    maxshape=(None, num_channels, timesteps),
                    dtype=np.float32
                )
                f.create_dataset(
                    'all_evoked_event_epochs_metadata', 
                    shape=(0, 4),  # subject, session, sample_number, evoked_event_id (coco train image)
                    maxshape=(None, 4),
                    dtype=np.int32
                )

Opening raw data file ..\..\data\all-joined-1\eeg\preprocessed\subj01_session1_eeg.fif...
    Range : 1121 ... 1777926 =      2.189 ...  3472.512 secs
Ready.
Reading 0 ... 1776805  =      0.000 ...  3470.322 secs...


In [5]:
for file in preprocessed_files:
    subject = file[5:6]
    session = file[14:15]

    raw = mne.io.read_raw_fif(os.path.join(preprocessed_data_dir, file), preload=True)
    
    with h5py.File(os.path.join(data_dir, dataset_file), 'r+') as f:

        for config in epoch_config:
            
            if config['mode'] == 'fixed_length_event':

                for dur in config['durations']:
                    epochs = mne.make_fixed_length_epochs(raw, duration=dur, preload=True)
                    epochs.drop_channels(['Status'])

                    data = epochs.get_data() # (batch size, channels, timesteps)
                    sample_numbers = epochs.events[:, 0] # (batch size, channels, timesteps)
                    
                    metadata = np.zeros((data.shape[0], 3), dtype=np.int32)
                    metadata[:, 0] = subject
                    metadata[:, 1] = session
                    metadata[:, 2] = sample_numbers

                    current_size = f[f'all_{dur}s_epochs'].shape[0]
                    new_size = current_size + data.shape[0]
                    f[f'all_{dur}s_epochs'].resize(new_size, axis=0)
                    f[f'all_{dur}s_epochs_metadata'].resize(new_size, axis=0)

                    f[f'all_{dur}s_epochs'][current_size:new_size] = data
                    f[f'all_{dur}s_epochs_metadata'][current_size:new_size] = metadata
                    
            if config['mode'] == 'evoked_event':

                    timesteps = int(ceil(sfreq * (config['duration_before'] + config['duration_after'])))
                    
                    evoked_events = mne.find_events(raw)
                    epochs = mne.Epochs(raw, evoked_events, tmin=-config['duration_before'], tmax=config['duration_after']+0.01, preload=True)
                    epochs.drop_channels(['Status'])

                    data = epochs.get_data()[:, :, :timesteps] # (batch size, channels, timesteps (forced))
                    sample_numbers = evoked_events[:, 0]
                    evoked_event_ids = evoked_events[:, -1]
                    
                    n_epochs = data.shape[0]
                    metadata = np.zeros((n_epochs, 4), dtype=np.int32)
                    metadata[:, 0] = subject
                    metadata[:, 1] = session
                    metadata[:, 2] = sample_numbers[:n_epochs]
                    metadata[:, 3] = evoked_event_ids[:n_epochs]

                    current_size = f['all_evoked_event_epochs'].shape[0]
                    new_size = current_size + data.shape[0]
                    f['all_evoked_event_epochs'].resize(new_size, axis=0)
                    f['all_evoked_event_epochs_metadata'].resize(new_size, axis=0)

                    f['all_evoked_event_epochs'][current_size:new_size] = data
                    f['all_evoked_event_epochs_metadata'][current_size:new_size] = metadata

Opening raw data file ..\..\data\all-joined-1\eeg\preprocessed\subj01_session1_eeg.fif...
    Range : 1121 ... 1777926 =      2.189 ...  3472.512 secs
Ready.
Reading 0 ... 1776805  =      0.000 ...  3470.322 secs...
Not setting metadata
57 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 57 events and 30720 original time points ...
0 bad epochs dropped
Not setting metadata
115 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 115 events and 15360 original time points ...
0 bad epochs dropped
Not setting metadata
347 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 347 events and 5120 original time points ...
0 bad epochs dropped
3839 events found on stim channel Status
Event IDs: [  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
  19  20  21  22  23  24  25  26  27  

In [None]:
# test_size = 0.2
# random_states = [97, 42, 56, 35, 68]

# with h5py.File(os.path.join(data_dir, dataset_file), 'r+') as f:

#       for i in range(len(random_states)):

#             for config in epoch_config:
            
#                   if config['mode'] == 'fixed_length_event':
                  
#                         for dur in config['durations']:
#                               epochs_dataset_name = f'all_{dur}s_epochs'
#                               epochs_shape = f[epochs_dataset_name].shape
#                               metadata_dataset_name = f'all_{dur}s_epochs_metadata'
#                               metadata_shape = f[metadata_dataset_name].shape
#                               indices = np.arange(epochs_shape[0])
                                          
#                               train_indices, test_indices = train_test_split(indices, test_size=test_size, random_state=random_states[i])

#                               train_epochs = f.create_dataset(
#                                     f'train_{i+1}_{dur}s_epochs',
#                                     shape=(len(train_indices), epochs_shape[1], epochs_shape[2]),
#                                     dtype=f[epochs_dataset_name].dtype
#                               )
#                               test_epochs = f.create_dataset(
#                                     f'test_{i+1}_{dur}s_epochs',
#                                     shape=(len(test_indices), epochs_shape[1], epochs_shape[2]),
#                                     dtype=f[epochs_dataset_name].dtype
#                               )
#                               train_metadata = f.create_dataset(
#                                     f'train_{i+1}_{dur}s_epochs_metadata',
#                                     shape=(len(train_indices), metadata_shape[1]),
#                                     dtype=f[metadata_dataset_name].dtype
#                               )
#                               test_metadata = f.create_dataset(
#                                     f'test_{i+1}_{dur}s_epochs_metadata',
#                                     shape=(len(test_indices), metadata_shape[1]),
#                                     dtype=f[metadata_dataset_name].dtype
#                               )

#                               for j, k in enumerate(train_indices):
#                                     train_epochs[j] = f[epochs_dataset_name][k]
#                                     train_metadata[j] = f[metadata_dataset_name][k]
                              
#                               for j, k in enumerate(test_indices):
#                                     test_epochs[j] = f[epochs_dataset_name][k]
#                                     test_metadata[j] = f[metadata_dataset_name][k]
                  
#                   elif config['mode'] == 'evoked_event':

#                               epochs_dataset_name = f'all_evoked_event_epochs'
#                               epochs_shape = f[epochs_dataset_name].shape
#                               metadata_dataset_name = f'all_evoked_event_epochs_metadata'
#                               metadata_shape = f[metadata_dataset_name].shape
#                               indices = np.arange(epochs_shape[0])
                                          
#                               train_indices, test_indices = train_test_split(indices, test_size=test_size, random_state=random_states[i])

#                               train_epochs = f.create_dataset(
#                                     f'train_{i+1}_evoked_event_epochs',
#                                     shape=(len(train_indices), epochs_shape[1], epochs_shape[2]),
#                                     dtype=f[epochs_dataset_name].dtype
#                               )
#                               test_epochs = f.create_dataset(
#                                     f'test_{i+1}_evoked_event_epochs',
#                                     shape=(len(test_indices), epochs_shape[1], epochs_shape[2]),
#                                     dtype=f[epochs_dataset_name].dtype
#                               )
#                               train_metadata = f.create_dataset(
#                                     f'train_{i+1}_evoked_event_epochs_metadata',
#                                     shape=(len(train_indices), metadata_shape[1]),
#                                     dtype=f[metadata_dataset_name].dtype
#                               )
#                               test_metadata = f.create_dataset(
#                                     f'test_{i+1}_evoked_event_epochs_metadata',
#                                     shape=(len(test_indices), metadata_shape[1]),
#                                     dtype=f[metadata_dataset_name].dtype
#                               )

#                               for j, k in enumerate(train_indices):
#                                     train_epochs[j] = f[epochs_dataset_name][k]
#                                     train_metadata[j] = f[metadata_dataset_name][k]
                              
#                               for j, k in enumerate(test_indices):
#                                     test_epochs[j] = f[epochs_dataset_name][k]
#                                     test_metadata[j] = f[metadata_dataset_name][k]

In [None]:
with h5py.File(os.path.join(data_dir, dataset_file), 'r+') as f:
    print(f['sfreq'][()])
    print()
    print([ch.decode('utf-8') for ch in f['ch_names'][()]])
    print()
    print(f['all_60s_epochs'].shape, f['all_60s_epochs_metadata'].shape)
    print(f['all_30s_epochs'].shape, f['all_30s_epochs_metadata'].shape)
    print(f['all_10s_epochs'].shape, f['all_10s_epochs_metadata'].shape)
    print(f['all_evoked_event_epochs'].shape, f['all_evoked_event_epochs_metadata'].shape)
    # print()
    # print(f['train_1_60s_epochs'].shape, f['train_1_60s_epochs_metadata'].shape)
    # print(f['train_1_30s_epochs'].shape, f['train_1_30s_epochs_metadata'].shape)
    # print(f['train_1_10s_epochs'].shape, f['train_1_10s_epochs_metadata'].shape)
    # print(f['train_1_evoked_event_epochs'].shape, f['train_1_evoked_event_epochs_metadata'].shape)
    # print()
    # print(f['train_2_60s_epochs'].shape, f['train_2_60s_epochs_metadata'].shape)
    # print(f['train_2_30s_epochs'].shape, f['train_2_30s_epochs_metadata'].shape)
    # print(f['train_2_10s_epochs'].shape, f['train_2_10s_epochs_metadata'].shape)
    # print(f['train_2_evoked_event_epochs'].shape, f['train_2_evoked_event_epochs_metadata'].shape)
    # print()
    # print(f['train_3_60s_epochs'].shape, f['train_3_60s_epochs_metadata'].shape)
    # print(f['train_3_30s_epochs'].shape, f['train_3_30s_epochs_metadata'].shape)
    # print(f['train_3_10s_epochs'].shape, f['train_3_10s_epochs_metadata'].shape)
    # print(f['train_3_evoked_event_epochs'].shape, f['train_3_evoked_event_epochs_metadata'].shape)
    # print()
    # print(f['train_4_60s_epochs'].shape, f['train_4_60s_epochs_metadata'].shape)
    # print(f['train_4_30s_epochs'].shape, f['train_4_30s_epochs_metadata'].shape)
    # print(f['train_4_10s_epochs'].shape, f['train_4_10s_epochs_metadata'].shape)
    # print(f['train_4_evoked_event_epochs'].shape, f['train_4_evoked_event_epochs_metadata'].shape)
    # print()
    # print(f['train_5_60s_epochs'].shape, f['train_5_60s_epochs_metadata'].shape)
    # print(f['train_5_30s_epochs'].shape, f['train_5_30s_epochs_metadata'].shape)
    # print(f['train_5_10s_epochs'].shape, f['train_5_10s_epochs_metadata'].shape)
    # print(f['train_5_evoked_event_epochs'].shape, f['train_5_evoked_event_epochs_metadata'].shape)
    # print()
    # print(f['test_1_60s_epochs'].shape, f['test_1_60s_epochs_metadata'].shape)
    # print(f['test_1_30s_epochs'].shape, f['test_1_30s_epochs_metadata'].shape)
    # print(f['test_1_10s_epochs'].shape, f['test_1_10s_epochs_metadata'].shape)
    # print(f['test_1_evoked_event_epochs'].shape, f['test_1_evoked_event_epochs_metadata'].shape)
    # print()
    # print(f['test_2_60s_epochs'].shape, f['test_2_60s_epochs_metadata'].shape)
    # print(f['test_2_30s_epochs'].shape, f['test_2_30s_epochs_metadata'].shape)
    # print(f['test_2_10s_epochs'].shape, f['test_2_10s_epochs_metadata'].shape)
    # print(f['test_2_evoked_event_epochs'].shape, f['test_2_evoked_event_epochs_metadata'].shape)
    # print()
    # print(f['test_3_60s_epochs'].shape, f['test_3_60s_epochs_metadata'].shape)
    # print(f['test_3_30s_epochs'].shape, f['test_3_30s_epochs_metadata'].shape)
    # print(f['test_3_10s_epochs'].shape, f['test_3_10s_epochs_metadata'].shape)
    # print(f['test_3_evoked_event_epochs'].shape, f['test_3_evoked_event_epochs_metadata'].shape)
    # print()
    # print(f['test_4_60s_epochs'].shape, f['test_4_60s_epochs_metadata'].shape)
    # print(f['test_4_30s_epochs'].shape, f['test_4_30s_epochs_metadata'].shape)
    # print(f['test_4_10s_epochs'].shape, f['test_4_10s_epochs_metadata'].shape)
    # print(f['test_4_evoked_event_epochs'].shape, f['test_4_evoked_event_epochs_metadata'].shape)
    # print()
    # print(f['test_5_60s_epochs'].shape, f['test_5_60s_epochs_metadata'].shape)
    # print(f['test_5_30s_epochs'].shape, f['test_5_30s_epochs_metadata'].shape)
    # print(f['test_5_10s_epochs'].shape, f['test_5_10s_epochs_metadata'].shape)
    # print(f['test_5_evoked_event_epochs'].shape, f['test_5_evoked_event_epochs_metadata'].shape)


512.0



AttributeError: 'numpy.ndarray' object has no attribute 'decode'

In [8]:
# raw = mne.io.read_raw_fif(os.path.join(data_dir, file), preload=True)

# filtered = raw.copy()
# montage = mne.channels.make_standard_montage('standard_1020')
# filtered.set_montage(montage)

# filtered.filter(l_freq=0.5, h_freq=125)
# filtered.notch_filter(freqs=60)

# ica_cleaned = filtered.copy()
# ica = mne.preprocessing.ICA(n_components=.95, random_state=97)
# ica = ica.fit(ica_cleaned)
# ica.exclude = [1]
# ica_cleaned = ica.apply(ica_cleaned)

# # fig1, fig2, fig3 = raw.plot(show=True), filtered.plot(show=True), ica_cleaned.plot(show=True)

# all_events = mne.find_events(ica_cleaned)

# first_event_time = all_events[0, 0] / ica_cleaned.info['sfreq'] - 0.05  # 50ms before first event
# last_event_time = all_events[-1, 0] / ica_cleaned.info['sfreq'] + 0.6   # 600ms after last event
# cropped = ica_cleaned.copy().crop(tmin=first_event_time, tmax=last_event_time)

In [9]:
# shapes = []
# for file in files[:1]:
#     raw = mne.io.read_raw_fif(os.path.join(data_dir, file))
#     _60s_epochs = mne.make_fixed_length_epochs(raw, duration=60, preload=True)

#     # all_events = mne.find_events(raw)
#     # _evoked_event_epochs = mne.Epochs(raw, all_events, preload=True, tmin=-0.05, tmax=0.6)

#     picks = mne.pick_types(_60s_epochs.info, eeg=True, stim=False, exclude='bads')
#     n_interpolates = np.array([1, 4, 32])
#     consensus_percs = np.linspace(0, 1.0, 11)

#     ar = AutoReject(n_interpolates, consensus_percs, picks=picks, thresh_method='random_search', random_state=42)
#     _60s_epochs = ar.fit_transform(_60s_epochs)
#     _60s_epochs.drop_channels(['Status'])
#     _60s_epochs.set_eeg_reference('average')
    
#     shapes.append((file, _60s_epochs.get_data().shape))