# Process the data as samples
Process the data as samples based on time windows by following these steps:
- Filter the data to remove noise and baseline shift.
- Resample the data to a lower frequency.
- Segment the data into fixed-size time windows.

The original data shape (n_channel, time_points) will be converted to the shape (N, n_channel, time_window).

---

Input: Raw data in folder `REFED-dataset/data/`

Output: Processed samples to folder *`'path_preprocessed'`*

In [1]:
import os
import mne
from scipy.io import loadmat, savemat

### Process parameters

In [2]:
path_data = './REFED-dataset/data'
path_preprocessed = './REFED-dataset/preprocessed'

# Preprocessing parameters
para_preprocess = {
    'EEG_fs'         : 1000,       # original EEG sampling rate
    'EEG_fs_down'    : 200,        # downsampled EEG sampling rate
    'EEG_filt_band'  : [0.5, 50],  # bandpass filter range for EEG
    'EEG_filt_north' : [50],       # notch filter frequency for EEG
    'fNIRS_fs'       : 1000/21,    # original fNIRS sampling rate
    'fNIRS_fs_down'  : 25,         # downsampled fNIRS sampling rate
    'fNIRS_filt_band': [0.01, 10], # bandpass filter range for fNIRS
    'window'         : 1           # time window in seconds
}

sub_list = os.listdir(path_data)

if not os.path.exists(path_preprocessed):
    os.makedirs(path_preprocessed)

### Process EEG data
Tip: The EEG data has been filtered using a bandpass filter (1-50 Hz) and a notch filter (50 Hz) in advance. So, the filtering step is skipped.

In [3]:
def preprocess_EEG_sub(path_data, para_preprocess, save_path=None):
    '''
    Preprocess EEG data for one subject
    Input:
        path_data: str, path to the subject data folder
        para_preprocess: dict, preprocessing parameters
        save_path: str or None, path to save the preprocessed data, if None, not save
    '''
    # data_EEG: n_channels * n_times
    EEG_data = loadmat(os.path.join(path_data, 'EEG_videos.mat'))
    EEG_preprocessed = {}
    
    for vi in EEG_data.keys(): # video keys
        if 'video_' in vi: # process only video data
            # EEG_vi = mne.filter.filter_data(EEG_data[vi], sfreq=para_preprocess['EEG_fs'], l_freq=para_preprocess['EEG_filt_band'][0], h_freq=para_preprocess['EEG_filt_band'][1], method='iir', verbose=False)
            # EEG_vi = mne.filter.notch_filter(EEG_vi, Fs=para_preprocess['EEG_fs'], freqs=para_preprocess['EEG_filt_north'], method='iir', verbose=False)
            EEG_vi = mne.filter.resample(EEG_data[vi], up=para_preprocess['EEG_fs_down'], down=para_preprocess['EEG_fs'], verbose=False)
            
            EEG_vi = EEG_vi.reshape(EEG_vi.shape[0], -1, para_preprocess['window']*para_preprocess['EEG_fs_down']).transpose([1,0,2])
            print('  [info] Video %s: %s -> %s' % (vi, EEG_data[vi].shape, EEG_vi.shape))
            EEG_preprocessed[vi] = EEG_vi
            
    if save_path is not None:
        preprocessed_mat_path = os.path.join(save_path, 'EEG_videos.mat')
        savemat(preprocessed_mat_path, EEG_preprocessed)
        print('[info] Save preprocessed data to: %s' % (preprocessed_mat_path))

In [4]:
for si in sub_list: # subject IDs
    print('Processing EEG data of subject ID.[%s]...' % si)
    path_data_si = os.path.join(path_data, si)
    path_preprocessed_si = os.path.join(path_preprocessed, si)
    if not os.path.exists(path_preprocessed_si):
        os.makedirs(path_preprocessed_si)
    
    preprocess_EEG_sub(path_data_si, para_preprocess, save_path=path_preprocessed_si)

Processing EEG data of subject ID.[1]...
  [info] Video video_1: (64, 134000) -> (134, 64, 200)
  [info] Video video_2: (64, 137000) -> (137, 64, 200)
  [info] Video video_3: (64, 90000) -> (90, 64, 200)
  [info] Video video_4: (64, 79000) -> (79, 64, 200)
  [info] Video video_5: (64, 135000) -> (135, 64, 200)
  [info] Video video_6: (64, 93000) -> (93, 64, 200)
  [info] Video video_7: (64, 122000) -> (122, 64, 200)
  [info] Video video_8: (64, 107000) -> (107, 64, 200)
  [info] Video video_9: (64, 70000) -> (70, 64, 200)
  [info] Video video_10: (64, 60000) -> (60, 64, 200)
  [info] Video video_11: (64, 61000) -> (61, 64, 200)
  [info] Video video_12: (64, 63000) -> (63, 64, 200)
  [info] Video video_13: (64, 111000) -> (111, 64, 200)
  [info] Video video_14: (64, 103000) -> (103, 64, 200)
  [info] Video video_15: (64, 170000) -> (170, 64, 200)
[info] Save preprocessed data to: ./REFED-dataset/preprocessed\1\EEG_videos.mat
Processing EEG data of subject ID.[10]...
  [info] Video video

### Process fNIRS data

In [5]:
def preprocess_fNIRS_sub(path_data, para_preprocess, save_path=None):
    '''
    Preprocess fNIRS data for one subject
    Input:
        path_data: str, path to the subject data folder
        para_preprocess: dict, preprocessing parameters
        save_path: str or None, path to save the preprocessed data, if None, not save
    '''
    # data_fNIRS: n_channels * n_times
    fNIRS_data = loadmat(os.path.join(path_data, 'fNIRS_videos.mat'))
    fNIRS_preprocessed = {}
    
    for vi in fNIRS_data.keys():
        if 'video_' in vi: # process only video data
            fNIRS_vi = mne.filter.filter_data(fNIRS_data[vi], sfreq=para_preprocess['fNIRS_fs'], l_freq=para_preprocess['fNIRS_filt_band'][0], h_freq=para_preprocess['fNIRS_filt_band'][1], method='iir', verbose=False)
            fNIRS_vi = mne.filter.resample(fNIRS_vi, up=para_preprocess['fNIRS_fs_down'], down=para_preprocess['fNIRS_fs'], verbose=False)
            if fNIRS_vi.shape[-1] % para_preprocess['fNIRS_fs_down'] != 0:
                if fNIRS_vi.shape[-1] % para_preprocess['fNIRS_fs_down'] == 1:
                    fNIRS_vi = fNIRS_vi[:, :, :-1]
                else:
                    raise NotImplementedError('  [error] Video %s: fNIRS data length cannot be divided by %d after downsampling!' % (vi, para_preprocess['fNIRS_fs_down']))
            fNIRS_vi = fNIRS_vi.reshape(fNIRS_vi.shape[0], fNIRS_vi.shape[1], -1, para_preprocess['window']*para_preprocess['fNIRS_fs_down']).transpose([2,0,1,3])
            print('  [info] Video %s: %s -> %s' % (vi, fNIRS_data[vi].shape, fNIRS_vi.shape))
            fNIRS_preprocessed[vi] = fNIRS_vi
            
    if save_path is not None:
        preprocessed_mat_path = os.path.join(save_path, 'fNIRS_videos.mat')
        savemat(preprocessed_mat_path, fNIRS_preprocessed)
        print('[info] Save preprocessed data to: %s' % (preprocessed_mat_path))

In [6]:
for si in sub_list: # subject IDs
    print('Processing fNIRS data of subject ID.[%s]...' % si)
    path_data_si = os.path.join(path_data, si)
    path_preprocessed_si = os.path.join(path_preprocessed, si)
    if not os.path.exists(path_preprocessed_si):
        os.makedirs(path_preprocessed_si)
    
    preprocess_fNIRS_sub(path_data_si, para_preprocess, save_path=path_preprocessed_si)

Processing fNIRS data of subject ID.[1]...


  [info] Video video_1: (6, 51, 6381) -> (134, 6, 51, 25)
  [info] Video video_2: (6, 51, 6524) -> (137, 6, 51, 25)
  [info] Video video_3: (6, 51, 4286) -> (90, 6, 51, 25)
  [info] Video video_4: (6, 51, 3762) -> (79, 6, 51, 25)
  [info] Video video_5: (6, 51, 6429) -> (135, 6, 51, 25)
  [info] Video video_6: (6, 51, 4429) -> (93, 6, 51, 25)
  [info] Video video_7: (6, 51, 5810) -> (122, 6, 51, 25)
  [info] Video video_8: (6, 51, 5096) -> (107, 6, 51, 25)
  [info] Video video_9: (6, 51, 3334) -> (70, 6, 51, 25)
  [info] Video video_10: (6, 51, 2858) -> (60, 6, 51, 25)
  [info] Video video_11: (6, 51, 2905) -> (61, 6, 51, 25)
  [info] Video video_12: (6, 51, 3001) -> (63, 6, 51, 25)
  [info] Video video_13: (6, 51, 5286) -> (111, 6, 51, 25)
  [info] Video video_14: (6, 51, 4905) -> (103, 6, 51, 25)
  [info] Video video_15: (6, 51, 8096) -> (170, 6, 51, 25)
[info] Save preprocessed data to: ./REFED-dataset/preprocessed\1\fNIRS_videos.mat
Processing fNIRS data of subject ID.[10]...
  [in