In [1]:
import nibabel as nib
import numpy as np
from os.path import join
import math
from scipy.spatial.distance import cdist
from scipy.sparse import csgraph
import cvxpy as cp
from sklearn.model_selection import KFold
from itertools import product
import h5py


In [3]:
import hdf5storage
ses = 1
sub = '04'
run = 1

mat = hdf5storage.loadmat(
    f'fmri_sub{sub}_ses{ses}_run{run}.mat',
    variable_names=['data'],   # optional: read only what you need
    squeeze_me=True            # optional: drop singleton dims
)
bold_data = mat['data']
print(type(bold_data), getattr(bold_data, 'shape', None))


KeyboardInterrupt: 

In [None]:
ses = 1
sub = '04'
run = 1

base_path = '/mnt/TeamShare/Data_Masterfile/H20-00572_All-Dressed/PRECISIONSTIM_PD_Data_Results/fMRI_preprocessed_data/Rev_pipeline/derivatives'
anat_img = nib.load(f"/mnt/TeamShare/Data_Masterfile/H20-00572_All-Dressed/PRECISIONSTIM_PD_Data_Results/fMRI_preprocessed_data/Rev_pipeline/derivatives/sub-pd0{sub}/ses-{ses}/anat/sub-pd0{sub}_ses-{ses}_T1w_brain.nii.gz")
data_name = f'fmri_sub{sub}_ses{ses}_run{run}.mat'
# BOLD_path_org = join(base_path, data_name)
BOLD_path_org = join(data_name)

with h5py.File(BOLD_path_org, 'r') as mat_file:
    bold_data = mat_file['data'][()]

# Matlab v7.3 stores arrays with time as the leading axis; align with beta volumes
bold_data = np.asarray(bold_data)
bold_data = np.transpose(bold_data, (3, 2, 1, 0))

# mask_path = f'/mnt/TeamShare/Data_Masterfile/H20-00572_All-Dressed/PRECISIONSTIM_PD_Data_Results/fMRI_preprocessed_data/Rev_pipeline/derivatives/sub-pd0{sub}/ses-{ses}/anat/sub-pd0{sub}_ses-{ses}_T1w_brain_mask.nii.gz'
# back_mask = nib.load(mask_path)
# mask_path = f'/mnt/TeamShare/Data_Masterfile/H20-00572_All-Dressed/PRECISIONSTIM_PD_Data_Results/fMRI_preprocessed_data/Rev_pipeline/derivatives/sub-pd0{sub}/ses-{ses}/anat/sub-pd0{sub}_ses-{ses}_T1w_brain_pve_0.nii.gz'
# csf_mask = nib.load(mask_path)
# print(anat_img.shape, bold_data.shape, back_mask.shape, csf_mask.shape)




In [None]:
back_mask_data = back_mask.get_fdata() > 0
csf_mask_data = csf_mask.get_fdata() > 0
mask = np.logical_and(back_mask_data, ~csf_mask_data)
nonzero_mask = np.where(mask)
masked_bold = bold_data[nonzero_mask]
print(f"number of selected voxels after masking: {masked_bold.shape[0]/math.prod(bold_data.shape[:3])*100:.2f}%")
print('bold_data masked shape:', masked_bold.shape)

glm_dict = np.load(f'/scratch/st-mmckeown-1/zkavian/fmri_models/TYPED_FITHRF_GLMDENOISE_RR.npy', allow_pickle=True).item()
beta_glm = glm_dict['betasmd']
beta_run1, beta_run2 = beta_glm[:,0,0,:90], beta_glm[:,0,0,90:]
R2_run1, R2_run2 = glm_dict['R2run'][:,:,:,0], glm_dict['R2run'][:,:,:,1]
beta = beta_run1 #beta_glm.shape
R2 = R2_run1
lower_thr, upper_thr = np.nanpercentile(beta, [1, 99])
print(f'low_thr: {lower_thr:.2f}, high_thr: {upper_thr:.2f}') #low_thr: -4.64, high_thr: 4.60
beta_extreme_mask = np.logical_or(beta < lower_thr, beta > upper_thr)
voxels_with_extreme_beta = np.any(beta_extreme_mask, axis=1)
print(f"percentage of voxels with extreme beta values: {np.sum(voxels_with_extreme_beta)/beta.shape[0]*100:.2f}%")

In [None]:
def calculate_matrices(beta_valume_clean_2d, bold_data, anat_img, mask_2d, trial_indices=None, trial_len=9):
    num_trials = beta_valume_clean_2d.shape[-1]
    if trial_indices is None:
        trial_idx = np.arange(num_trials)
    else:
        trial_idx = np.asarray(trial_indices, dtype=int).ravel()
        trial_idx = np.unique(trial_idx)

    beta_selected = beta_valume_clean_2d[:, trial_idx]

    mean_beta_filtered = np.nanmean(beta_selected, axis=-1)
    L_task = np.zeros_like(mean_beta_filtered)
    np.divide(1.0, np.abs(mean_beta_filtered), out=L_task, where=mean_beta_filtered != 0)

###
    bold_data_reshape = np.reshape(bold_data, (-1, bold_data.shape[-1]))
    bold_data_selected = bold_data_reshape[~mask_2d]

    bold_data_selected_reshape = np.zeros((bold_data_selected.shape[0], num_trials, trial_len), dtype=bold_data_selected.dtype)
    start = 0
    for i in range(num_trials):
        end = start + trial_len
        if end > bold_data_selected.shape[1]:
            raise ValueError("BOLD data does not contain enough timepoints for all trials")
        bold_data_selected_reshape[:, i, :] = bold_data_selected[:, start:end]
        start += trial_len
        if start == 270 or start == 560:
            start += 20

    selected_BOLD_data_subset = bold_data_selected_reshape[:, trial_idx, :]

    diff_mat = np.diff(selected_BOLD_data_subset, axis=1)
    diff_mat_flat = diff_mat.reshape(diff_mat.shape[0], -1)
    L_var = np.cov(diff_mat_flat, bias=False, dtype=np.float32)
    L_var = (L_var + L_var.T) / 2 + 1e-6 * np.eye(L_var.shape[0], dtype=np.float32)
####

    mask_selected = (~mask_2d).reshape(bold_data.shape[:3])
    selected_linear_idx = np.flatnonzero(mask_selected)

    voxel_indices_local = np.column_stack(np.unravel_index(selected_linear_idx, bold_data.shape[:3]))
    voxel_indices = voxel_indices_local
    selected_world_coords = nib.affines.apply_affine(anat_img.affine, voxel_indices)
    D = cdist(selected_world_coords, selected_world_coords)
    nonzero = D[D > 0]
    sigma = np.median(nonzero) if nonzero.size else 1.0
    W = np.exp(-D**2 / (2.0 * sigma**2))
    np.fill_diagonal(W, 0.0)
    L_smooth = csgraph.laplacian(W, normed=False)

    selected_BOLD_flat = selected_BOLD_data_subset.reshape(selected_BOLD_data_subset.shape[0], -1)

    return L_task, L_var, L_smooth, selected_BOLD_flat

def objective_func(w, L_task, L_var, L_smooth, alpha_var, alpha_smooth):
    quad = (w.T @ np.diag(L_task) @ w + alpha_var * (w.T @ L_var @ w) + alpha_smooth * (w.T @ L_smooth @ w))
    return quad

def optimize_voxel_weights(L_task, L_var, L_smooth, alpha_var, alpha_smooth):
    L_total = np.diag(L_task) + alpha_var * L_var + alpha_smooth * L_smooth
    n = L_total.shape[0]
    L_total = np.nan_to_num(L_total)
    L_total = 0.5*(L_total + L_total.T) + 1e-8*np.eye(n)
    w = cp.Variable(n, nonneg=True)
    constraints = [cp.sum(w) == 1]

    # objective = cp.Minimize(cp.quad_form(w, L_total) + alpha_sparse * cp.norm1(w))
    objective = cp.Minimize(cp.quad_form(w, L_total))
    problem = cp.Problem(objective, constraints)
    problem.solve(solver=cp.OSQP, verbose=True)
    return w.value


def calculate_weight(param_grid, beta_valume_clean_2d, bold_data, anat_img, mask_2d, trial_len):
    kf = KFold(n_splits=2, shuffle=True, random_state=0)
    best_score = np.inf
    best_params = None
    num_trials = beta_valume_clean_2d.shape[-1]

    for a_var, a_smooth in product(*param_grid.values()):
        fold_scores = []
        print(f"a_var: {a_var}, a_smooth: {a_smooth}")
        count = 1

        for train_idx, val_idx in kf.split(np.arange(num_trials)):
            print(f"k-fold num: {count}")
            L_task_train, L_var_train, L_smooth_train, _ = calculate_matrices(beta_valume_clean_2d, bold_data, anat_img, mask_2d, train_idx, trial_len)
            w = optimize_voxel_weights(L_task_train, L_var_train, L_smooth_train, alpha_var=a_var, alpha_smooth=a_smooth)

            L_task_val, L_var_val, L_smooth_val, _ = calculate_matrices(beta_valume_clean_2d, bold_data, anat_img, mask_2d, val_idx, trial_len)

            fold_scores.append(objective_func(w, L_task_val, L_var_val, L_smooth_val, a_var, a_smooth))
            print(f"fold_scores: {fold_scores}")
            count += 1

        mean_score = np.mean(fold_scores)
        print(mean_score)
        if mean_score < best_score:
            best_score = mean_score
            best_params = (a_var, a_smooth)

    print("Best parameters:", best_params, "with CV loss:", best_score)
    return best_params, best_score

####################

In [None]:
beta_volume_filter = np.load("beta_volume_filter.npy")
spatial_shape = beta_volume_filter.shape[:-1]
voxels_with_any_nan = np.zeros(spatial_shape, dtype=bool)
voxels_with_all_nan = np.ones(spatial_shape, dtype=bool)

# Sweep the time dimension once
for t in range(beta_volume_filter.shape[-1]):
    frame_nan = np.isnan(beta_volume_filter[..., t])
    voxels_with_any_nan |= frame_nan
    voxels_with_all_nan &= frame_nan

print(np.sum(voxels_with_any_nan), np.sum(voxels_with_all_nan))

n_trial = beta_volume_filter.shape[-1]
beta_volume_filter_2d = beta_volume_filter.reshape(-1, n_trial)
print(beta_volume_filter_2d.shape)
mask_2d = voxels_with_all_nan.reshape(-1)
beta_valume_clean_2d = beta_volume_filter_2d[~mask_2d]
print(beta_valume_clean_2d.shape)


param_grid = {
    "alpha_var":   [0.01, 0.5, 1.0],
    "alpha_smooth":[0.01, 0.5, 1.0]}

trial_len = 9
best_params, best_score = calculate_weight(param_grid, beta_valume_clean_2d, bold_data, anat_img, mask_2d, trial_len)
L_task, L_var, L_smooth, selected_BOLD_data = calculate_matrices(beta_valume_clean_2d, bold_data, anat_img, mask_2d, None, trial_len)
weights = optimize_voxel_weights(L_task, L_var, L_smooth, alpha_var=best_params[0], alpha_smooth=best_params[1])
y = selected_BOLD_data.T @ weights

np.save('weights.npy', weights)
np.save('y.npy', y)

