# 回归拟合-大脑对齐（fmri->imgfeat）

In [1]:
import os
# 获取当前文件所在目录
current_dir = os.path.dirname(os.path.abspath(''))
print("当前工作目录:", current_dir)
# 设置工作目录
os.chdir('/data/zlhu/NeuroAI/Eye-of-Brain/Brain-Diffusion/encoding')
# 验证工作目录
print("当前工作目录:", os.getcwd())

当前工作目录: /data
当前工作目录: /data/zlhu/NeuroAI/Eye-of-Brain/Brain-Diffusion/encoding


In [2]:
import argparse, os
import numpy as np
from himalaya.backend import set_backend
from himalaya.ridge import RidgeCV
from himalaya.scoring import correlation_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [3]:
# 参数设置
target = 'init_latent'   # init_latent / c
roi = ['early']  # early  /  ventral
# roi = ['early','midventral','midparietal','midlateral','ventral','parietal','lateral']   # 需要对齐的ROI

backend = set_backend("numpy", on_error="warn")
subject='subj01'

In [4]:
# 路径设置
mridir = f'../data/fmri/{subject}/'
featdir = f'../data/nsdfeat/{subject}_feat/'

savedir = f'../data/decoded/{subject}/'
os.makedirs(savedir, exist_ok=True)

In [5]:
# 超参数选择，正则化系数
# alphas = [1e3, 3e3, 6e3, 9e3, 1e4, 3e4, 6e4, 9e4, 1e5]
# alpha = 5e4
alphas = [0.000001,0.00001,0.0001,0.001,0.01, 0.1, 1]

In [6]:
'''算法流程'''
# 创建岭回归
ridge = RidgeCV(alphas=alphas)

# 创建预处理流程
preprocess_pipeline = make_pipeline(
    StandardScaler(with_mean=True, with_std=True),
)

# 预处理+岭回归 流程pipeline
pipeline = make_pipeline(
    preprocess_pipeline,
    ridge,
) 

In [18]:
'''加载训练和测试数据'''
X = []  # 训练样本（特有刺激响应），每个subj的所列roi的所有刺激响应体素值betas（9000*3=27000）
X_te = []  #测试样本（共有刺激大脑响应），每个subj的所列roi的平均刺激响应体素值betas（1000）
for croi in roi:
    cX = np.load(f'{mridir}/{subject}_{croi}_betas_tr.npy').astype("float32")
    cX_te = np.load(f'{mridir}/{subject}_{croi}_betas_ave_te.npy').astype("float32")
    X.append(cX)
    X_te.append(cX_te)

# 堆叠所有roi并转为np
X = np.hstack(X)
X_te = np.hstack(X_te)

# 目标数据（图像特征）
Y = np.load(f'{featdir}/{subject}_each_{target}_tr.npy').astype("float32").reshape([X.shape[0],-1])
Y_te = np.load(f'{featdir}/{subject}_ave_{target}_te.npy').astype("float32").reshape([X_te.shape[0],-1])

In [8]:
print(f'X {X.shape}, Y {Y.shape}, X_te {X_te.shape}, Y_te {Y_te.shape}')

X (27000, 5917), Y (27000, 6400), X_te (1000, 5917), Y_te (1000, 6400)


In [9]:
# 训练回归拟合模型
print(f'Now making decoding model for... {subject}:  {roi}, {target}')
pipeline.fit(X, Y)

Now making decoding model for... subj01:  ['early'], init_latent


In [19]:
# 模型预测和评估
brain_embs = pipeline.predict(X_te)
rs = correlation_score(Y_te.T,brain_embs.T)
print(f'Prediction accuracy is: {np.mean(rs):3.3}')

Prediction accuracy is: 0.251


In [20]:
print(X_te[0])
print(brain_embs[0])
print(Y_te[0])
print(brain_embs.shape)

[2493.3333   960.3333   440.      ... -262.66666 -229.66667 -205.66667]
[-0.07173416 -0.42249662  0.1324219  ... -0.04046825  0.23343459
  0.13494942]
[-0.95976335 -0.2181579  -1.7245413  ... -0.06844854  0.84640396
  0.431206  ]
(1000, 6400)


In [21]:
best_alpha = pipeline.named_steps['ridgecv'].best_alphas_
print("Best alpha parameter:", best_alpha)

Best alpha parameter: [1. 1. 1. ... 1. 1. 1.]


In [22]:
# 保存预测结果
np.save(f'{savedir}/{subject}_{"_".join(roi)}_brain_embs_{target}.npy',brain_embs)

### 下面是对齐后的解码

In [34]:
source_subj='subj01'
target_subj = 'subj07'
roi = ['early']   # 需要对齐的ROI

# 路径设置
tardir = f'../data/fmri/{target_subj}'
braindir = f'../data/fmri_align/{target_subj}'


In [35]:
brain_align = np.load(f'{braindir}/{target_subj}_{"_".join(roi)}_brain_align2_{source_subj}.npy')

In [36]:
Y_te = np.load(f'../data/nsdfeat/{target_subj}_feat/{target_subj}_ave_{target}_te.npy').astype("float32")
Y_te = Y_te[800:]

In [37]:
brain_embs = pipeline.predict(brain_align)
rs = correlation_score(Y_te.T,brain_embs.T)
print(f'Prediction accuracy is: {np.mean(rs):3.3}')
# print(brain_embs.shape)
# print(brain_align.shape)
# print(Y_te.shape)

Prediction accuracy is: 0.0784


In [38]:
np.save(f'../data/decoded/{target_subj}/{target_subj}_to_{source_subj}_{"_".join(roi)}_brain_embs_{target}.npy',brain_embs)