# Data load example
The examples below show how to load the EEG data, fNIRS data, and labels.

The functions are also defined in `load_REFED.py`:
```python
from load_REFED import load_data, load_feature, load_labels
```

Main functions:
- `load_data(data_path, sub_list, modality)` to load raw data / processed data.
- `load_feature(data_path, sub_list, modality)` to load extracted features.
- `load_labels(data_path, sub_list, dimension)` to load labels.

In [1]:
import os
import numpy as np
from scipy.io import loadmat, savemat
import gc

In [None]:
path_data         = './REFED-dataset/data'
path_preprocessed = './REFED-dataset/preprocessed'
path_feature      = './REFED-dataset/features'
path_annotation   = './REFED-dataset/annotations'

In [3]:
'''
The following functions are used to load data, features, and labels.
Also defined in `load_REFED.py`
'''

def load_data(data_path, sub_list=None, modality:list=['EEG', 'fNIRS']):
    '''
    Load EEG and fNIRS data for specified subjects and modalities.
    Parameters:
        data_path (str): Path to the data directory.
        sub_list (list, optional): List of subject identifiers to load. If None, load all subjects.
        modality (list, optional): List of modalities to load ('EEG', 'fNIRS'). Default is both.
    Returns:
        data (dict): Nested dictionary with structure data[subject][modality][video].
    '''
    data = {}
    if sub_list is None:
        sub_list = os.listdir(data_path)
    
    for si in sub_list:
        # Determine whether it is the subject folder
        if os.path.isdir(os.path.join(data_path, si)):
            data[si] = {}
            # Read EEG and F-NIRS data
            if 'EEG' in modality:
                path_si_EEG = os.path.join(data_path, si, 'EEG_videos.mat')
                data_si_EEG = loadmat(path_si_EEG)
                data[si]['EEG'] = {'v%d'%vi :data_si_EEG['video_%d' % vi] for vi in range(1,16)}
                del data_si_EEG
            if 'fNIRS' in modality:
                path_si_fNIRS = os.path.join(data_path, si, 'fNIRS_videos.mat')
                data_si_fNIRS = loadmat(path_si_fNIRS)
                data[si]['fNIRS'] = {'v%d'%vi:data_si_fNIRS['video_%d' % vi] for vi in range(1,16)}
                del data_si_fNIRS
            gc.collect()
    return data



def load_feature(data_path, sub_list=None, modality:list=['EEG', 'fNIRS']):
    '''
    Load EEG and fNIRS features for specified subjects and modalities.
    Parameters:
        data_path (str): Path to the feature directory.
        sub_list (list, optional): List of subject identifiers to load. If None, load all subjects.
        modality (list, optional): List of modalities to load ('EEG', 'fNIRS'). Default is both.
    Returns:
        data (dict): Nested dictionary with structure data[subject][modality][video].
    '''
    data = {}
    if sub_list is None:
        sub_list = os.listdir(data_path)
    
    for si in sub_list:
        # Judge whether it is the subject folder
        if os.path.isdir(os.path.join(data_path, si)):
            data[si] = {}
            # Read EEG and fNIRS data
            if 'EEG' in modality:
                path_si_EEG = os.path.join(data_path, si, 'EEG_videos_feature.mat')
                data_si_EEG = loadmat(path_si_EEG)
                data[si]['EEG'] = {'v%d'%vi :data_si_EEG['video_%d' % vi] for vi in range(1,16)}
                del data_si_EEG
            if 'fNIRS' in modality:
                path_si_fNIRS = os.path.join(data_path, si, 'fNIRS_videos_feature.mat')
                data_si_fNIRS = loadmat(path_si_fNIRS)
                data[si]['fNIRS'] = {'v%d'%vi:data_si_fNIRS['video_%d' % vi] for vi in range(1,16)}
                del data_si_fNIRS
            gc.collect()
    return data



def load_label(data_path, sub_list=None, dimension:list=['Valence', 'Arousal']):
    '''
    Load labels for specified subjects and dimensions.
    Parameters:
        data_path (str): Path to the label directory.
        sub_list (list, optional): List of subject identifiers to load. If None, load all subjects.
        dimension (list, optional): List of dimensions to load ('Valence', 'Arousal'). Default is both.
    Returns:
        label (dict): Nested dictionary with structure label[subject][video][dimension].
    '''
    label = {}
    if sub_list is None:
        sub_file = os.listdir(data_path)
    else:
        sub_file = ['s%s_label.mat' % si for si in sub_list]
        
    for si in sub_file:
        if si.endswith('_label.mat'):
            si_key = 's%s' % si[:-10]
            file_path = os.path.join(data_path, si)
            label_si = loadmat(file_path)
            
            label[si_key] = {}
            for vi in range(1,16):
                vi_key = 'v%d' % vi
                label[si_key][vi_key] = {}
                if 'Valence' in dimension:
                    label[si_key][vi_key]['Valence'] = label_si['video_%d' % vi][:, 0]
                if 'Arousal' in dimension:
                    label[si_key][vi_key]['Arousal'] = label_si['video_%d' % vi][:, 1]
    return label

### Load raw data
This needs enough memory to load all data.

In [None]:
load_data(path_data, modality=['fNIRS']) # 6GB memory needed
load_data(path_data, modality=['EEG'])   # 25GB memory needed

### Load processed data

In [None]:
load_data(path_preprocessed) # 10GB memory needed

### Load features

In [None]:
load_feature(path_feature)

### Load labels

In [None]:
load_label(path_annotation)