# fMRI数据预处理

In [1]:
import os
# 获取当前文件所在目录
current_dir = os.path.dirname(os.path.abspath(''))
print("当前工作目录:", current_dir)
# 设置工作目录
os.chdir('/data/zlhu/NeuroAI/Eye-of-Brain/Brain-Diffusion/encoding')
# 验证工作目录
print("当前工作目录:", os.getcwd())

当前工作目录: /data
当前工作目录: /data/zlhu/NeuroAI/Eye-of-Brain/Brain-Diffusion/encoding


In [2]:
import argparse
import os
import numpy as np
import pandas as pd
from nsd_access import NSDAccess
import scipy.io

In [3]:
!pwd

/data/zlhu/NeuroAI/Eye-of-Brain/Brain-Diffusion/encoding


In [26]:
# 全局参数设置
subject = "subj07"   # subj01 or subj02  or subj05  or subj07 for full-data subjects
atlasname = 'streams'

In [27]:
# 数据保存路径
savedir = f'../data/fmri/{subject}/'
os.makedirs(savedir, exist_ok=True)

# 源数据路径
nsda = NSDAccess('../data/nsd/')

# 加载stim刺激数据
stims_unique = np.load(f'../data/stim/{subject}/{subject}_stims_ave.npy')
stims_all = np.load(f'../data/stim/{subject}/{subject}_stims.npy')

# 加载NSD实验设计信息
os.makedirs('../data/nsd/nsddata/experiments/nsd', exist_ok=True)
nsd_expdesign = scipy.io.loadmat('../data/nsd/nsddata/experiments/nsd/nsd_expdesign.mat')
# 由于nsd_expdesign的索引是基于1的，这里减1以适应Python的0索引
sharedix = nsd_expdesign['sharedix'] -1 

In [28]:
#fmri数据源路径
fmri_path = f'../data/nsd/nsddata_betas/ppdata/{subject}/func1pt8mm/betas_fithrf_GLMdenoise_RR'
os.makedirs(fmri_path, exist_ok=True)

In [7]:
# 获取每个会话的betas值（fMRI大脑响应数据）
for i in range(1, 41):
    print(f"now processing {subject} - session{i}")
    beta_trial = nsda.read_betas(subject=subject, 
                            session_index=i, 
                            trial_index=[], # empty list as index means get all for this session
                            data_type='betas_fithrf_GLMdenoise_RR',
                            data_format='func1pt8mm')
    if i==1:
        betas_all = beta_trial
    else:
        betas_all = np.concatenate((betas_all,beta_trial),0)

now processing subj07 - session1
now processing subj07 - session2
now processing subj07 - session3
now processing subj07 - session4
now processing subj07 - session5
now processing subj07 - session6
now processing subj07 - session7
now processing subj07 - session8
now processing subj07 - session9
now processing subj07 - session10
now processing subj07 - session11
now processing subj07 - session12
now processing subj07 - session13
now processing subj07 - session14
now processing subj07 - session15
now processing subj07 - session16
now processing subj07 - session17
now processing subj07 - session18
now processing subj07 - session19
now processing subj07 - session20
now processing subj07 - session21
now processing subj07 - session22
now processing subj07 - session23
now processing subj07 - session24
now processing subj07 - session25
now processing subj07 - session26
now processing subj07 - session27
now processing subj07 - session28
now processing subj07 - session29
now processing subj07 -

In [8]:
# roi数据源路径
os.makedirs('../data/nsd/nsddata/freesurfer/fsaverage/label',exist_ok=True)
os.makedirs(f'../data/nsd/nsddata/ppdata/{subject}/func1pt8mm/roi',exist_ok=True)

In [9]:
# 读取大脑图谱信息
atlas = nsda.read_atlas_results(subject=subject, atlas=atlasname, data_format='func1pt8mm')
# 对每个ROI进行处理
for roi,val in atlas[1].items():
    '''分roi处理'''
    print(roi,val)
    if val == 0:
        print('SKIP')
        continue
    else:
        # 根据ROI来筛选beta值
        betas_roi = betas_all[:,atlas[0].transpose([2,1,0])==val]

    # 打印ROI beta值的形状（样本量*体素数量）
    print(betas_roi.shape)

    '''计算betas_roi_ave'''
    # 给每个stim刺激计算平均beta值（每个刺激呈现过3次）
    betas_roi_ave = []
    for stim in stims_unique:
        stim_mean = np.mean(betas_roi[stims_all == stim,:],axis=0)
        betas_roi_ave.append(stim_mean)
    betas_roi_ave = np.stack(betas_roi_ave)
    
    # 打印ROI beta值平均后的形状 
    print(betas_roi_ave.shape)

    '''划分tr te'''
    # 训练集/测试集划分
    betas_tr = []
    betas_te = []
    # 若在share（共享图片）中，则为测试集
    for idx,stim in enumerate(stims_all):
        if stim in sharedix:
            betas_te.append(betas_roi[idx,:])
        else:
            betas_tr.append(betas_roi[idx,:])
    # 全betas
    betas_tr = np.stack(betas_tr)
    betas_te = np.stack(betas_te)    
    
    # 训练集/测试集划分       
    betas_ave_tr = []
    betas_ave_te = []
    # 处理平均数据
    for idx,stim in enumerate(stims_unique):
        if stim in sharedix:
            betas_ave_te.append(betas_roi_ave[idx,:])
        else:
            betas_ave_tr.append(betas_roi_ave[idx,:])
    #平均betas
    betas_ave_tr = np.stack(betas_ave_tr)
    betas_ave_te = np.stack(betas_ave_te)    

    
    # 保存所有betas和平均betas的训练和测试数据
    np.save(f'{savedir}/{subject}_{roi}_betas_tr.npy',betas_tr)
    np.save(f'{savedir}/{subject}_{roi}_betas_te.npy',betas_te)
    np.save(f'{savedir}/{subject}_{roi}_betas_ave_tr.npy',betas_ave_tr)
    np.save(f'{savedir}/{subject}_{roi}_betas_ave_te.npy',betas_ave_te)

Unknown 0
SKIP
early 1
(30000, 4559)
(10000, 4559)
midventral 2
(30000, 785)
(10000, 785)
midlateral 3
(30000, 628)
(10000, 628)
midparietal 4
(30000, 692)
(10000, 692)
ventral 5
(30000, 6515)


(10000, 6515)
lateral 6
(30000, 6118)
(10000, 6118)
parietal 7
(30000, 2459)
(10000, 2459)


In [10]:
'''
对每个个体的每个大脑体素做计算
每个subj特有刺激9000个，重复3次，共27000次大脑响应betas
共有刺激1000个，重复3次，共3000次大脑响应betas

全部刺激27000+3000 = 30000次betas
平均刺激30000/3 = 10000  (9000+1000) 次betas
'''
print(betas_tr.shape)
print(betas_te.shape)
print(betas_ave_tr.shape)
print(betas_ave_te.shape)

(27000, 2459)
(3000, 2459)
(9000, 2459)
(1000, 2459)
