In [1]:
import os
import random
from sklearn.model_selection import train_test_split

In [2]:
# 定义电极区域划分，将电极名称分组到不同的区域
electrode_regions = {'Frontal': ['Fp1', 'Fp2', 'F3', 'F4', 'F7', 'F8'],
                     'Central': ['C3', 'C4'],
                     'Parietal': ['P3', 'P4'],
                     'Occipital': ['O1', 'O2'],
                     'Temporal': ['T3', 'T4', 'T5', 'T6']}
# 哈咯好咯，修改一下试试

In [3]:
# 设置文件夹路径和类别名称
folders = [r'D:\PycharmProjects\eegProject\data\Test_EEG\HC',
           r'D:\PycharmProjects\eegProject\data\Test_EEG\MDD',
           r'D:\PycharmProjects\eegProject\data\Test_EEG\BD']
class_names = ['HC', 'MDD', 'BD']

# 定义划分比例
train_ratio = 0.6
val_ratio = 0.2
test_ratio = 0.2

# 创建存储数据集的字典
data_splits = {'train': [], 'val': [], 'test': []}

# 遍历每个类别的文件夹
for folder, class_name in zip(folders, class_names):
    # 获取文件夹中的Clean.mat文件列表
    file_list = os.listdir(folder)
    file_list = [os.path.join(folder, file) for file in file_list if file.endswith('Clean.mat')]

    # 随机划分数据
    train_files, remaining_files = train_test_split(file_list, train_size=train_ratio, random_state=42)
    val_files, test_files = train_test_split(remaining_files, test_size=test_ratio / (test_ratio + val_ratio),
                                             random_state=42)

    # 将数据加入到对应的数据集中
    data_splits['train'].extend([(file, class_name) for file in train_files])
    data_splits['val'].extend([(file, class_name) for file in val_files])
    data_splits['test'].extend([(file, class_name) for file in test_files])

# 打印每个划分的数据统计信息
for split, files in data_splits.items():
    print(f"{split} set: {len(files)} samples")

train set: 171 samples
val set: 57 samples
test set: 58 samples


In [4]:
def calculate_band_power(freqs, psd, freq_start, freq_end):
    # 确定频率范围索引
    freq_start_idx = np.argmax(freqs >= freq_start)
    freq_end_idx = np.argmax(freqs >= freq_end)

    # 提取频率范围内的功率谱密度
    selected_psd = psd[:, freq_start_idx:freq_end_idx+1]

    # 计算频带能量（求和）
    band_power = np.sum(selected_psd)  # 或使用 np.mean(selected_psd) 求平均值

    return band_power

In [5]:
label_dic = {'HC':0, 'MDD':1, "BD":2}

In [6]:
import numpy as np
from numpy import *
import mne
import scipy.io as sio
import csv

for sub, label in data_splits['train']:  # 被试循环

    data = sio.loadmat(sub)
    sample = data['EEG_ECClean']
    eeg_data = sample["data"]
    ch_names = sample['chanlocs'][0][0][0]['labels']
    ch_names = [item[0] for item in ch_names]
    info = mne.create_info(
        ch_names=['Fp1', 'Fp2', 'F3', 'F4', 'C3', 'C4', 'P3', 'P4', 'O1', 'O2', 'F7', 'F8', 'T3', 'T4', 'T5', 'T6'],
        ch_types='eeg',
        sfreq=sample["srate"])
    raw = mne.io.RawArray(eeg_data[0][0], info)
    raw_res = raw.resample(200)

    # 提取电极区域的时序信息和频域信息
    region_data = {}

    for region, electrodes in electrode_regions.items():

        # 提取电极区域的数据
        region_eeg_data, _ = raw_res[electrodes, :]

        # 计算时序信息（均值）
        region_time_series = region_eeg_data.mean(axis=0)  # 根据需要进行修改

        for i in range (10):  # 时序信息分为十个时间窗口
            region_time_series_list = []

            for j in range(5):
                region_time_series_list.append(mean(region_time_series[int(j/5*len(region_time_series)):int((j+1)/5*len(region_time_series))]))

            region_eeg_data_win = region_eeg_data[:, int(i/10*len(region_time_series)):int((i+1)/10*len(region_time_series))]

            # 计算频域信息（功率谱密度）
            info = mne.create_info(
                ch_names=electrodes,
                ch_types='eeg',
                sfreq=sample["srate"]
            )
            region_raw = mne.io.RawArray(region_eeg_data_win, info)

            region_psd, region_freqs = mne.time_frequency.psd_array_welch(region_raw.get_data(), sfreq=200)

            # 计算不同频段的频带能量，要有重叠
            # Delta波1-3Hz
            Delta = calculate_band_power(region_freqs, region_psd, 1, 3)
            # Theta波4-7Hz
            Theta = calculate_band_power(region_freqs, region_psd, 4, 7)
            # Alpha波9-13Hz
            Alpha = calculate_band_power(region_freqs, region_psd, 9, 13)
            # Beta波14-29Hz
            Beta = calculate_band_power(region_freqs, region_psd, 14, 29)
            # Gamma波30-48Hz
            Gamma = calculate_band_power(region_freqs, region_psd, 30, 48)

            # 存储时序信息和频域信息
            region_data[region] = {'Delta': Delta,
                                   'Theta': Theta,
                                   'Alpha': Alpha,
                                   'Beta': Beta,
                                   'Gamma': Gamma,
                                   'time_series_0': region_time_series_list[0],
                                   'time_series_1': region_time_series_list[1],
                                   'time_series_2': region_time_series_list[2],
                                   'time_series_3': region_time_series_list[3],
                                   'time_series_4': region_time_series_list[4],
                                   }

            # 定义保存文件的路径
            save_path = r'D:\PycharmProjects\eegProject\extract_data\train\{}_region_data_{}_{}.csv'.format(label, sub[48:-16], i,)

            # 打开文件进行写入
            with open(save_path, 'w', newline='') as file:
                writer = csv.writer(file)

                # 写入表头
                writer.writerow(['Region', 'Time Series_0', 'Time Series_1', 'Time Series_2', 'Time Series_3', 'Time Series_4', 'Delta', 'Theta', 'Alpha', 'Beta', 'Gamma', 'Label'])

                # 遍历每个区域并写入数据
                for region, data in region_data.items():
                    region_time_series_0 = data['time_series_0']
                    region_time_series_1 = data['time_series_1']
                    region_time_series_2 = data['time_series_2']
                    region_time_series_3 = data['time_series_3']
                    region_time_series_4 = data['time_series_4']
                    region_Delta = data['Delta']
                    region_Theta = data['Theta']
                    region_Alpha = data['Alpha']
                    region_Beta = data['Beta']
                    region_Gamma = data['Gamma']

                    # 写入每行数据
                    writer.writerow(
                        [region + '_' + sub[48:-16], region_time_series_0, region_time_series_1, region_time_series_2, region_time_series_3, region_time_series_4, region_Delta, region_Theta, region_Alpha, region_Beta, region_Gamma, label_dic[label]])

Creating RawArray with float64 data, n_channels=16, n_times=34448
    Range : 0 ... 34447 =      0.000 ...   172.235 secs
Ready.
Creating RawArray with float64 data, n_channels=6, n_times=3444
    Range : 0 ... 3443 =      0.000 ...    17.215 secs
Ready.
Effective window size : 1.280 (s)
Creating RawArray with float64 data, n_channels=6, n_times=3445
    Range : 0 ... 3444 =      0.000 ...    17.220 secs
Ready.
Effective window size : 1.280 (s)
Creating RawArray with float64 data, n_channels=6, n_times=3445
    Range : 0 ... 3444 =      0.000 ...    17.220 secs
Ready.
Effective window size : 1.280 (s)
Creating RawArray with float64 data, n_channels=6, n_times=3445
    Range : 0 ... 3444 =      0.000 ...    17.220 secs
Ready.
Effective window size : 1.280 (s)
Creating RawArray with float64 data, n_channels=6, n_times=3445
    Range : 0 ... 3444 =      0.000 ...    17.220 secs
Ready.
Effective window size : 1.280 (s)
Creating RawArray with float64 data, n_channels=6, n_times=3444
    Ran

In [7]:
for sub, label in data_splits['val']:  # 被试循环

    data = sio.loadmat(sub)
    sample = data['EEG_ECClean']
    eeg_data = sample["data"]
    ch_names = sample['chanlocs'][0][0][0]['labels']
    ch_names = [item[0] for item in ch_names]
    info = mne.create_info(
        ch_names=['Fp1', 'Fp2', 'F3', 'F4', 'C3', 'C4', 'P3', 'P4', 'O1', 'O2', 'F7', 'F8', 'T3', 'T4', 'T5', 'T6'],
        ch_types='eeg',
        sfreq=sample["srate"])
    raw = mne.io.RawArray(eeg_data[0][0], info)
    raw_res = raw.resample(200)

    # 提取电极区域的时序信息和频域信息
    region_data = {}

    for region, electrodes in electrode_regions.items():

        # 提取电极区域的数据
        region_eeg_data, _ = raw_res[electrodes, :]

        # 计算时序信息（均值）
        region_time_series = region_eeg_data.mean(axis=0)  # 根据需要进行修改

        for i in range (10):  # 时序信息分为十个时间窗口
            region_time_series_list = []

            for j in range(5):
                region_time_series_list.append(mean(region_time_series[int(j/5*len(region_time_series)):int((j+1)/5*len(region_time_series))]))

            region_eeg_data_win = region_eeg_data[:, int(i/10*len(region_time_series)):int((i+1)/10*len(region_time_series))]

            # 计算频域信息（功率谱密度）
            info = mne.create_info(
                ch_names=electrodes,
                ch_types='eeg',
                sfreq=sample["srate"]
            )
            region_raw = mne.io.RawArray(region_eeg_data_win, info)

            region_psd, region_freqs = mne.time_frequency.psd_array_welch(region_raw.get_data(), sfreq=200)

            # 计算不同频段的频带能量
            # Delta波1-3Hz
            Delta = calculate_band_power(region_freqs, region_psd, 1, 3)
            # Theta波4-7Hz
            Theta = calculate_band_power(region_freqs, region_psd, 4, 7)
            # Alpha波9-13Hz
            Alpha = calculate_band_power(region_freqs, region_psd, 9, 13)
            # Beta1波14-29Hz
            Beta = calculate_band_power(region_freqs, region_psd, 14, 29)
            # Gamma1波30-48Hz
            Gamma = calculate_band_power(region_freqs, region_psd, 30, 48)

            # 存储时序信息和频域信息
            region_data[region] = {'Delta': Delta,
                                   'Theta': Theta,
                                   'Alpha': Alpha,
                                   'Beta': Beta,
                                   'Gamma': Gamma,
                                   'time_series_0': region_time_series_list[0],
                                   'time_series_1': region_time_series_list[1],
                                   'time_series_2': region_time_series_list[2],
                                   'time_series_3': region_time_series_list[3],
                                   'time_series_4': region_time_series_list[4],
                                   }

            # 定义保存文件的路径
            save_path = r'D:\PycharmProjects\eegProject\extract_data\val\{}_region_data_{}_{}.csv'.format(label, sub[48:-16], i,)

            # 打开文件进行写入
            with open(save_path, 'w', newline='') as file:
                writer = csv.writer(file)

                # 写入表头
                writer.writerow(['Region', 'Time Series_0', 'Time Series_1', 'Time Series_2', 'Time Series_3', 'Time Series_4', 'Delta', 'Theta', 'Alpha', 'Beta', 'Gamma', 'Label'])

                # 遍历每个区域并写入数据
                for region, data in region_data.items():
                    region_time_series_0 = data['time_series_0']
                    region_time_series_1 = data['time_series_1']
                    region_time_series_2 = data['time_series_2']
                    region_time_series_3 = data['time_series_3']
                    region_time_series_4 = data['time_series_4']
                    region_Delta = data['Delta']
                    region_Theta = data['Theta']
                    region_Alpha = data['Alpha']
                    region_Beta = data['Beta']
                    region_Gamma = data['Gamma']

                    # 写入每行数据
                    writer.writerow(
                        [region + '_' + sub[48:-16], region_time_series_0, region_time_series_1, region_time_series_2, region_time_series_3, region_time_series_4, region_Delta, region_Theta, region_Alpha, region_Beta, region_Gamma, label_dic[label]])

Creating RawArray with float64 data, n_channels=16, n_times=34520
    Range : 0 ... 34519 =      0.000 ...   172.595 secs
Ready.
Creating RawArray with float64 data, n_channels=6, n_times=3452
    Range : 0 ... 3451 =      0.000 ...    17.255 secs
Ready.
Effective window size : 1.280 (s)
Creating RawArray with float64 data, n_channels=6, n_times=3452
    Range : 0 ... 3451 =      0.000 ...    17.255 secs
Ready.
Effective window size : 1.280 (s)
Creating RawArray with float64 data, n_channels=6, n_times=3452
    Range : 0 ... 3451 =      0.000 ...    17.255 secs
Ready.
Effective window size : 1.280 (s)
Creating RawArray with float64 data, n_channels=6, n_times=3452
    Range : 0 ... 3451 =      0.000 ...    17.255 secs
Ready.
Effective window size : 1.280 (s)
Creating RawArray with float64 data, n_channels=6, n_times=3452
    Range : 0 ... 3451 =      0.000 ...    17.255 secs
Ready.
Effective window size : 1.280 (s)
Creating RawArray with float64 data, n_channels=6, n_times=3452
    Ran

In [8]:
for sub, label in data_splits['test']:  # 被试循环

    data = sio.loadmat(sub)
    sample = data['EEG_ECClean']
    eeg_data = sample["data"]
    ch_names = sample['chanlocs'][0][0][0]['labels']
    ch_names = [item[0] for item in ch_names]
    info = mne.create_info(
        ch_names=['Fp1', 'Fp2', 'F3', 'F4', 'C3', 'C4', 'P3', 'P4', 'O1', 'O2', 'F7', 'F8', 'T3', 'T4', 'T5', 'T6'],
        ch_types='eeg',
        sfreq=sample["srate"])
    raw = mne.io.RawArray(eeg_data[0][0], info)
    raw_res = raw.resample(200)

    # 提取电极区域的时序信息和频域信息
    region_data = {}

    for region, electrodes in electrode_regions.items():

        # 提取电极区域的数据
        region_eeg_data, _ = raw_res[electrodes, :]

        # 计算时序信息（均值）
        region_time_series = region_eeg_data.mean(axis=0)  # 根据需要进行修改

        for i in range (10):  # 时序信息分为十个时间窗口
            region_time_series_list = []

            for j in range(5):
                region_time_series_list.append(mean(region_time_series[int(j/5*len(region_time_series)):int((j+1)/5*len(region_time_series))]))

            region_eeg_data_win = region_eeg_data[:, int(i/10*len(region_time_series)):int((i+1)/10*len(region_time_series))]

            # 计算频域信息（功率谱密度）
            info = mne.create_info(
                ch_names=electrodes,
                ch_types='eeg',
                sfreq=sample["srate"]
            )
            region_raw = mne.io.RawArray(region_eeg_data_win, info)

            region_psd, region_freqs = mne.time_frequency.psd_array_welch(region_raw.get_data(), sfreq=200)

            # 计算不同频段的频带能量
            # Delta波1-3Hz
            Delta = calculate_band_power(region_freqs, region_psd, 1, 3)
            # Theta波4-7Hz
            Theta = calculate_band_power(region_freqs, region_psd, 4, 7)
            # Alpha波9-13Hz
            Alpha = calculate_band_power(region_freqs, region_psd, 9, 13)
            # Beta1波14-29Hz
            Beta = calculate_band_power(region_freqs, region_psd, 14, 29)
            # Gamma1波30-48Hz
            Gamma = calculate_band_power(region_freqs, region_psd, 30, 48)

            # 存储时序信息和频域信息
            region_data[region] = {'Delta': Delta,
                                   'Theta': Theta,
                                   'Alpha': Alpha,
                                   'Beta': Beta,
                                   'Gamma': Gamma,
                                   'time_series_0': region_time_series_list[0],
                                   'time_series_1': region_time_series_list[1],
                                   'time_series_2': region_time_series_list[2],
                                   'time_series_3': region_time_series_list[3],
                                   'time_series_4': region_time_series_list[4],
                                   }

            # 定义保存文件的路径
            save_path = r'D:\PycharmProjects\eegProject\extract_data\test\{}_region_data_{}_{}.csv'.format(label, sub[48:-16], i,)

            # 打开文件进行写入
            with open(save_path, 'w', newline='') as file:
                writer = csv.writer(file)

                # 写入表头
                writer.writerow(['Region', 'Time Series_0', 'Time Series_1', 'Time Series_2', 'Time Series_3', 'Time Series_4', 'Delta', 'Theta', 'Alpha', 'Beta', 'Gamma', 'Label'])

                # 遍历每个区域并写入数据
                for region, data in region_data.items():
                    region_time_series_0 = data['time_series_0']
                    region_time_series_1 = data['time_series_1']
                    region_time_series_2 = data['time_series_2']
                    region_time_series_3 = data['time_series_3']
                    region_time_series_4 = data['time_series_4']
                    region_Delta = data['Delta']
                    region_Theta = data['Theta']
                    region_Alpha = data['Alpha']
                    region_Beta = data['Beta']
                    region_Gamma = data['Gamma']

                    # 写入每行数据
                    writer.writerow(
                        [region + '_' + sub[48:-16], region_time_series_0, region_time_series_1, region_time_series_2, region_time_series_3, region_time_series_4, region_Delta, region_Theta, region_Alpha, region_Beta, region_Gamma, label_dic[label]])

Creating RawArray with float64 data, n_channels=16, n_times=33248
    Range : 0 ... 33247 =      0.000 ...   166.235 secs
Ready.
Creating RawArray with float64 data, n_channels=6, n_times=3324
    Range : 0 ... 3323 =      0.000 ...    16.615 secs
Ready.
Effective window size : 1.280 (s)
Creating RawArray with float64 data, n_channels=6, n_times=3325
    Range : 0 ... 3324 =      0.000 ...    16.620 secs
Ready.
Effective window size : 1.280 (s)
Creating RawArray with float64 data, n_channels=6, n_times=3325
    Range : 0 ... 3324 =      0.000 ...    16.620 secs
Ready.
Effective window size : 1.280 (s)
Creating RawArray with float64 data, n_channels=6, n_times=3325
    Range : 0 ... 3324 =      0.000 ...    16.620 secs
Ready.
Effective window size : 1.280 (s)
Creating RawArray with float64 data, n_channels=6, n_times=3325
    Range : 0 ... 3324 =      0.000 ...    16.620 secs
Ready.
Effective window size : 1.280 (s)
Creating RawArray with float64 data, n_channels=6, n_times=3324
    Ran