## Load Dataset

In [9]:
print(eeg_data.keys())

dict_keys(['__header__', '__version__', '__globals__', 'setname', 'filename', 'filepath', 'subject', 'group', 'condition', 'session', 'comments', 'nbchan', 'trials', 'pnts', 'srate', 'xmin', 'xmax', 'times', 'data', 'icaact', 'icawinv', 'icasphere', 'icaweights', 'icachansind', 'chanlocs', 'urchanlocs', 'chaninfo', 'ref', 'event', 'urevent', 'eventdescription', 'epoch', 'epochdescription', 'reject', 'stats', 'specdata', 'specicaact', 'splinefile', 'icasplinefile', 'dipfit', 'history', 'saved', 'etc', 'run', 'datfile'])


In [31]:
import h5py
import mne 

file_roi = "G:\\共用雲端硬碟\\CNElab_黎承宣&賴璁毅_EEG_ROI\\A.Dataset\\processed_setfile\\processed_0_ICA_DLtrain.set"
file_eeg = "G:\\共用雲端硬碟\\CNElab_黎承宣&賴璁毅_EEG_ROI\\A.Dataset\\setfile\\0_ICA_DLtrain.set"

with h5py.File(file_roi, 'r') as f:
    if 'roi' in f:
        ROI = f['roi']['source_voxel_data'][:]
        print(ROI.shape)

EEG = mne.io.read_raw_eeglab(file_eeg, preload=True)
print(EEG.info)

(3, 5003, 30200)
Reading G:\共用雲端硬碟\CNElab_黎承宣&賴璁毅_EEG_ROI\A.Dataset\setfile\0_ICA_DLtrain.fdt
Reading 0 ... 77496  =      0.000 ...   302.719 secs...
<Info | 8 non-empty values
 bads: []
 ch_names: FP1, FP2, F7, F3, FZ, F4, F8, FT7, FC3, FCZ, FC4, FT8, T3, C3, ...
 chs: 30 EEG
 custom_ref_applied: False
 dig: 33 items (3 Cardinal, 30 EEG)
 highpass: 0.0 Hz
 lowpass: 128.0 Hz
 meas_date: unspecified
 nchan: 30
 projs: []
 sfreq: 256.0 Hz
>


In [None]:
import os
import numpy as np
import h5py
import mne
from torch.utils.data import Dataset

class EEGROIDataset(Dataset):
    def __init__(self, roi_folder, eeg_folder, overlap=0.5, window_size=500):
        """
        Args:
            roi_folder (str): Path to the folder containing ROI .set files.
            eeg_folder (str): Path to the folder containing EEG .set files.
            overlap (float): Fraction of overlap between consecutive windows (0 <= overlap < 1).
            window_size (int): Number of samples in each window.
        """
        self.roi_folder = roi_folder
        self.eeg_folder = eeg_folder
        self.overlap = overlap
        self.window_size = window_size
        self.subjects = self._get_subject_list()

        self.data = []  # Will store tuples of (ROI segment, EEG segment)
        self._prepare_dataset()

    def _get_subject_list(self):
        """Gets the list of subjects based on file names in the ROI folder."""
        return [f.split('_')[1] for f in os.listdir(self.roi_folder) if f.endswith('.set')]

    def _prepare_dataset(self):
        """Reads and processes data for all subjects."""
        for subject in self.subjects:
            roi_path = os.path.join(self.roi_folder, f"processed_{subject}_ICA_DLtrain.set")
            eeg_path = os.path.join(self.eeg_folder, f"{subject}_ICA_DLtrain.set")

            # Load ROI data
            with h5py.File(roi_path, 'r') as f:
                if 'roi' in f:
                    roi_data = f['roi']['source_voxel_data'][:]

            # Load EEG data
            eeg_data = mne.io.read_raw_eeglab(eeg_path, preload=True).get_data()

            # Verify dimensions
            assert roi_data.shape[0] == 3, f"Unexpected ROI shape: {roi_data.shape}"
            assert roi_data.shape[1] == 5003, f"Unexpected ROI shape: {roi_data.shape}"
            assert eeg_data.shape[0] == 30, f"Unexpected EEG shape: {eeg_data.shape}"

            # Process and overlap data
            self._process_subject_data(roi_data, eeg_data)

    def _process_subject_data(self, roi_data, eeg_data):
        """Segments and overlaps data for a single subject."""
        step = int(self.window_size * (1 - self.overlap))

        for start_idx in range(0, roi_data.shape[1] - self.window_size + 1, step):
            roi_segment = roi_data[:, start_idx:start_idx + self.window_size, :]
            eeg_segment = eeg_data[:, start_idx:start_idx + self.window_size]

            if roi_segment.shape[1] == self.window_size and eeg_segment.shape[1] == self.window_size:
                self.data.append((roi_segment, eeg_segment))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Usage example
roi_folder = "G:\\共用雲端硬碟\\CNElab_黎承宣&賴璁毅_EEG_ROI\\A.Dataset\\processed_setfile"
eeg_folder = "G:\\共用雲端硬碟\\CNElab_黎承宣&賴璁毅_EEG_ROI\\A.Dataset\\setfile"

overlap = 0.5  # 50% overlap
window_size = 500

# Create dataset
dataset = EEGROIDataset(roi_folder, eeg_folder, overlap, window_size)
print(f"Total dataset size: {len(dataset)}")

"""
77497/30200 = 2.5660
78879/30800 = 2.5610
77005/30000 = 2.5668
"""

Reading G:\共用雲端硬碟\CNElab_黎承宣&賴璁毅_EEG_ROI\A.Dataset\setfile\92_ICA_DLtrain.fdt
Reading 0 ... 80158  =      0.000 ...   313.117 secs...
Reading G:\共用雲端硬碟\CNElab_黎承宣&賴璁毅_EEG_ROI\A.Dataset\setfile\489_ICA_DLtrain.fdt
Reading 0 ... 87111  =      0.000 ...   340.277 secs...


In [None]:
import os
import numpy as np
import h5py
import mne
from torch.utils.data import Dataset

roi_folder = "G:\\共用雲端硬碟\\CNElab_黎承宣&賴璁毅_EEG_ROI\\A.Dataset\\processed_setfile"
eeg_folder = "G:\\共用雲端硬碟\\CNElab_黎承宣&賴璁毅_EEG_ROI\\A.Dataset\\setfile"
roi_folder_ls = [f.split('_')[1] for f in os.listdir(roi_folder) if f.endswith('.set')]

shape_list = []

for subject in roi_folder_ls:
    roi_path = os.path.join(roi_folder, f"processed_{subject}_ICA_DLtrain.set")
    eeg_path = os.path.join(eeg_folder, f"{subject}_ICA_DLtrain.set")
    # Load ROI data
    with h5py.File(roi_path, 'r') as f:
        if 'roi' in f:
            roi_data = f['roi']['source_voxel_data'][:]
            print(roi_data.shape)

    # Load EEG data
    eeg_data = mne.io.read_raw_eeglab(eeg_path, preload=True).get_data()
    print(eeg_data.shape)
    shape_list.append((roi_data.shape, eeg_data.shape))

NameError: name 'h5py' is not defined