# 数据结构解读

In [1]:
import scipy.io as sio 
import h5py, os,mne
import numpy as np
# , 
# path = '/home/test/Desktop/python/EEG_data/AAD_dataset/AAD_DTU/EEG'
# file_path = os.path.join(path, 'S1_data_preproc.mat')
# data = scio.loadmat(file_path)

In [2]:
def load_mat_data(filename):
    root = '/home/test/Desktop/python/EEG_data/AAD_dataset/AAD_DTU/EEG'
    filepath = os.path.join(root, filename)
    
    # 读取 .mat 文件
    mat = sio.loadmat(filepath)
    
    eeg_data = mat['data']['eeg'][0,0]
    wavA_data = mat['data']['wavA'][0,0]
    wavB_data = mat['data']['wavB'][0,0]
    event_data = mat['data'][0,0]['event'][0,0]['eeg']['value']
    
    # 初始化堆叠数据的列表
    eeg_stack = []
    wavA_stack = []
    wavB_stack = []
#     event_stack = []
    # 遍历每个数据并堆叠
    for i in range(60):
        eeg_stack.append(eeg_data[0,i])
        wavA_stack.append(wavA_data[0,i])
        wavB_stack.append(wavB_data[0,i])
#         
    
    # 将列表转换为 numpy 数组，并调整形状
    eeg_stack = np.stack(eeg_stack, axis=0)  # (60, 6400, 66)
    wavA_stack = np.stack(wavA_stack, axis=0)  # (60, 800000, 1)
    wavB_stack = np.stack(wavB_stack, axis=0)  # (60, 800000, 1)
#     event_stack = np.array(event_stack).reshape(60, 1)
    
    # 调整 wavA 和 wavB 的形状为 (60, 1, 800000)
    eeg_stack = np.transpose(eeg_stack, (0, 2, 1))
    wavA_stack = np.transpose(wavA_stack, (0, 2, 1))
    wavB_stack = np.transpose(wavB_stack, (0, 2, 1))
    event_data = np.transpose(event_data, (1, 0))
    return eeg_stack, wavA_stack, wavB_stack, event_data


In [3]:
# 示例调用
filename = 'S17_data_preproc.mat'
eeg_stack, wavA_stack, wavB_stack, event_stack = load_mat_data(filename)

print("EEG stack shape:", eeg_stack.shape)      # 期望输出: (60, 66, 6400)
print("wavA stack shape:", wavA_stack.shape)    # 期望输出: (60, 1, 800000)
print("wavB stack shape:", wavB_stack.shape)    # 期望输出: (60, 1, 800000)
# print("Event stack shape:", event_stack)  # 期望输出: (60, 1)

EEG stack shape: (60, 66, 6400)
wavA stack shape: (60, 1, 800000)
wavB stack shape: (60, 1, 800000)


# 数据切片堆叠

In [3]:
import numpy as np

def slice_data(eeg_stack, wavA_stack, wavB_stack, event_stack, slice_length_sec):
    # 采样率
    eeg_sample_rate = 128
    wav_sample_rate = 16000

    # 计算切片步长
    eeg_slice_step = int(slice_length_sec * eeg_sample_rate)
    wav_slice_step = int(slice_length_sec * wav_sample_rate)
    
    # 初始化切片数据的列表
    eeg_slices = []
    wavA_slices = []
    wavB_slices = []
    event_slices = []
    
    # 遍历每个实验数据并进行切片
    num_trials = eeg_stack.shape[0]
    trial_length_eeg = eeg_stack.shape[2]
    trial_length_wav = wavA_stack.shape[2]
    
    for i in range(num_trials):
        for j in range(0, trial_length_eeg, eeg_slice_step):
            if j + eeg_slice_step <= trial_length_eeg:
                eeg_slices.append(eeg_stack[i, :, j:j+eeg_slice_step])
        
        for j in range(0, trial_length_wav, wav_slice_step):
            if j + wav_slice_step <= trial_length_wav:
                wavA_slices.append(wavA_stack[i, :, j:j+wav_slice_step])
                wavB_slices.append(wavB_stack[i, :, j:j+wav_slice_step])
        
        # 事件标签重复相应次数
        num_slices = trial_length_eeg // eeg_slice_step
        event_slices.extend([event_stack[i]] * num_slices)
    
    # 将列表转换为 numpy 数组，并调整形状
    eeg_slices = np.array(eeg_slices)  # (num_slices * num_trials, 66, eeg_slice_step)
    wavA_slices = np.array(wavA_slices)  # (num_slices * num_trials, 1, wav_slice_step)
    wavB_slices = np.array(wavB_slices)  # (num_slices * num_trials, 1, wav_slice_step)
    event_slices = np.array(event_slices).reshape(-1, 1)  # (num_slices * num_trials, 1)
    
    return eeg_slices, wavA_slices, wavB_slices, event_slices

In [52]:
# 示例调用
filename = 'S1_data_preproc.mat'
eeg_stack, wavA_stack, wavB_stack, event_stack = load_mat_data(filename)

slice_length_sec = 1  # 例如，切片长度为1秒
eeg_slices, wavA_slices, wavB_slices, event_slices = slice_data(eeg_stack, wavA_stack, wavB_stack, event_stack, slice_length_sec)

print("EEG slices shape:", eeg_slices.shape)      # 期望输出: (3000, 66, 6400)
print("wavA slices shape:", wavA_slices.shape)    # 期望输出: (3000, 1, 16000)
print("wavB slices shape:", wavB_slices.shape)    # 期望输出: (3000, 1, 16000)
print("Event slices shape:", event_slices.shape)  # 期望输出: (3000, 1)

EEG slices shape: (3000, 66, 128)
wavA slices shape: (3000, 1, 16000)
wavB slices shape: (3000, 1, 16000)
Event slices shape: (3000, 1)


In [None]:
# filename = 'S1_data_preproc.mat' 下一个是filename = 'S2_data_preproc.mat'。。。一直到filename = 'S18_data_preproc.mat'，

# 保存数据

## 单个被试

In [5]:
def process_and_save_all_files(slice_length_sec):
    root = '/home/test/Desktop/python/EEG_data/AAD_dataset/AAD_DTU/EEG'
    output_dir = '/home/test/Desktop/python/EEG_data/AAD_dataset/AAD_DTU/Processed/Dataset_0_5s'
    
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    for i in range(1, 19):
        filename = f'S{i}_data_preproc.mat'
        eeg_stack, wavA_stack, wavB_stack, event_stack = load_mat_data(filename)
        
        eeg_slices, wavA_slices, wavB_slices, event_slices = slice_data(eeg_stack, wavA_stack, wavB_stack, event_stack, slice_length_sec)
        
        output_filename = f'S{i}_Dataset_{int(slice_length_sec)}s.npz'
        output_path = os.path.join(output_dir, output_filename)
        
        np.savez(output_path, eeg_slices=eeg_slices, wavA_slices=wavA_slices, wavB_slices=wavB_slices, event_slices=event_slices)
        print(f"Saved {output_filename}")

In [6]:
# 示例调用
slice_length_sec = 0.5  # 例如，切片长度为1秒
process_and_save_all_files(slice_length_sec)

Saved S1_Dataset_0s.npz
Saved S2_Dataset_0s.npz
Saved S3_Dataset_0s.npz
Saved S4_Dataset_0s.npz
Saved S5_Dataset_0s.npz
Saved S6_Dataset_0s.npz
Saved S7_Dataset_0s.npz
Saved S8_Dataset_0s.npz
Saved S9_Dataset_0s.npz
Saved S10_Dataset_0s.npz
Saved S11_Dataset_0s.npz
Saved S12_Dataset_0s.npz
Saved S13_Dataset_0s.npz
Saved S14_Dataset_0s.npz
Saved S15_Dataset_0s.npz
Saved S16_Dataset_0s.npz
Saved S17_Dataset_0s.npz
Saved S18_Dataset_0s.npz
