# Load data and concanate each word to one block

In [2]:
import numpy as np
from sklearn.preprocessing import StandardScaler
import os
import sys
sys.path.append('../')
from ecog_band.utils import *
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report, confusion_matrix
import torch
from torch.utils import data as Data
from sklearn.model_selection import GridSearchCV,train_test_split
import torch.nn as nn
import seaborn as sns
from matplotlib.colors import Normalize
from matplotlib.patches import Rectangle
import scipy.io as scio
import matplotlib.image as mpimg
import matplotlib as mpl
import matplotlib.patches as patches
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

bands_list = ['theta', 'alpha', 'beta1', 'beta2', 'low gamma', 'high gamma']
freq_list=[400]
HS_list=[68, 69, 75, 79, 82, 83, 84, 85, 86]
mpl.rcParams['font.size'] =10

In [3]:
import numpy as np
import os

def process_and_save_data(HS_list, freq_list, bands_list, base_path, save_path):
    """
    处理 ECoG 数据并保存为任务相关格式。
    
    参数:
    - HS_list: list，被试列表
    - freq_list: list，频段列表
    - bands_list: list，band 列表
    - base_path: str，数据存储基础路径
    - save_path: str，结果保存路径
    """
    keys = ['功课', '力果', '宫客', '作业', '树叶', '对十', '数页', '绿草']
    
    # 确保保存路径存在
    os.makedirs(save_path, exist_ok=True)
    
    for HS in HS_list:
        for freq in freq_list:
            cue_data_dict = {}
            read_data_dict = {}

            for band in bands_list:
                cue_trials = []
                read_trials = []
                # 加载每个 band 的 block 数据
                file_path = os.path.join(base_path, f"HS{HS}block_{band}.npy")
                block_data = np.load(file_path, allow_pickle=True).item()

                # 处理 cue 数据
                for key in keys:
                    if key in block_data:
                        data = block_data[key]  # data 是一个长度为 256 的列表，每个元素 shape 为 (time2200, trials)
                        reshaped_data = np.array([elec_data.T for elec_data in data])  # (electrodes, trials, time)
                        reshaped_data = reshaped_data.transpose(1, 0, 2)  # (trials, electrodes, time)
                        start = int(1*400 - 0.2*400)
                        cue_trials.append(reshaped_data[:, :, int(start):int(start+0.95*freq)])
                        read_trials.append(reshaped_data[:, :, int(start+2*freq):int(start+2*freq+0.95*freq)])
                
                # 合并所有 trials
                if cue_trials:
                    cue_trials = np.concatenate(cue_trials, axis=0)  # 合并所有 (trials, electrodes, time_steps)
                if read_trials:
                    read_trials = np.concatenate(read_trials, axis=0)

                print(f'cue trails shape for {band}: {cue_trials.shape}, cue trails shape for band{band}: {read_trials.shape}')
                
                # 存储处理好的数据到对应字典
                cue_data_dict[band] = cue_trials
                read_data_dict[band] = read_trials

            # 保存每个频段的数据
            os.makedirs(os.path.join(save_path, f'HS{HS}'), exist_ok=True)
            np.save(os.path.join(save_path, f"HS{HS}/cue_allbands_{freq}hz.npy"), cue_data_dict)
            np.save(os.path.join(save_path, f"HS{HS}/read_allbands_{freq}hz.npy"), read_data_dict)

            print(f"Processed and saved data for HS{HS}, freq {freq}")   


In [4]:
base_path = '/public/DATA/overt_reading/Blocked_data/'  # 数据基础路径
save_path = '/public/DATA/overt_reading/aligned_data/outpinyin'  # 数据保存路径
os.makedirs(save_path, exist_ok=True)
# 调用数据处理函数
process_and_save_data(HS_list, freq_list, bands_list, base_path, save_path)

cue trails shape for theta: (247, 256, 380), cue trails shape for bandtheta: (247, 256, 380)
cue trails shape for alpha: (247, 256, 380), cue trails shape for bandalpha: (247, 256, 380)
cue trails shape for beta1: (247, 256, 380), cue trails shape for bandbeta1: (247, 256, 380)
cue trails shape for beta2: (247, 256, 380), cue trails shape for bandbeta2: (247, 256, 380)
cue trails shape for low gamma: (247, 256, 380), cue trails shape for bandlow gamma: (247, 256, 380)
cue trails shape for high gamma: (247, 256, 380), cue trails shape for bandhigh gamma: (247, 256, 380)
Processed and saved data for HS68, freq 400
cue trails shape for theta: (271, 256, 380), cue trails shape for bandtheta: (271, 256, 380)
cue trails shape for alpha: (271, 256, 380), cue trails shape for bandalpha: (271, 256, 380)
cue trails shape for beta1: (271, 256, 380), cue trails shape for bandbeta1: (271, 256, 380)
cue trails shape for beta2: (271, 256, 380), cue trails shape for bandbeta2: (271, 256, 380)
cue trai

# load data of listen

In [10]:
base_path = '/public/DATA/overt_reading/Blocked_data/'  # 数据基础路径
save_path = '/public/DATA/overt_reading/aligned_data/'  # 数据保存路径

keys = ['功课 listen', '力果 listen', '作业 listen', '树叶 listen', '对十 listen', '绿草 listen']
for HS in HS_list:
    for freq in freq_list:
        listen_data_dict = {}
        for band in bands_list:
            # 加载每个 band 的 block 数据
            file_path = os.path.join(base_path, f"HS{HS}block_{band}.npy")
            block_data = np.load(file_path, allow_pickle=True).item()
            # print(block_data.keys())
            block_data_all = []
            for word in keys:
                block_data_all.append(block_data[word].transpose(2, 0, 1))
                # print(block_data[word].shape) #(256, 800, 36) (n_electrode, n_time, n_trails)
            block_data_all = np.vstack(block_data_all)
            print(f'{band} band data shape: {block_data_all.shape}')
            listen_data_dict[band] = block_data_all
        os.makedirs(os.path.join(save_path, f'HS{HS}'), exist_ok=True)
        np.save(os.path.join(save_path, f"HS{HS}/listen_allbands_{freq}hz.npy"), listen_data_dict)
            

theta band data shape: (216, 256, 800)
alpha band data shape: (216, 256, 800)
beta1 band data shape: (216, 256, 800)
beta2 band data shape: (216, 256, 800)
low gamma band data shape: (216, 256, 800)
high gamma band data shape: (216, 256, 800)
theta band data shape: (216, 256, 800)
alpha band data shape: (216, 256, 800)
beta1 band data shape: (216, 256, 800)
beta2 band data shape: (216, 256, 800)
low gamma band data shape: (216, 256, 800)
high gamma band data shape: (216, 256, 800)
theta band data shape: (216, 256, 800)
alpha band data shape: (216, 256, 800)
beta1 band data shape: (216, 256, 800)
beta2 band data shape: (216, 256, 800)
low gamma band data shape: (216, 256, 800)
high gamma band data shape: (216, 256, 800)
theta band data shape: (180, 256, 800)
alpha band data shape: (180, 256, 800)
beta1 band data shape: (180, 256, 800)
beta2 band data shape: (180, 256, 800)
low gamma band data shape: (180, 256, 800)
high gamma band data shape: (180, 256, 800)
theta band data shape: (216,