In [1]:
!date

Tue Apr  8 03:58:42 PM EDT 2025


In [2]:
pwd

'/projects/sccn/andromeda1/aglinska/BC-ABCD-denoise/Code'

In [3]:
import os
import numpy as np
import pandas as pd
import ants
from matplotlib import pyplot as plt
import pickle
from tqdm import tqdm

In [4]:
def load_pickle(fn):
    if os.path.exists(fn):
        with open(fn, 'rb') as file:
            loaded_dict = pickle.load(file)
    return loaded_dict

In [34]:
def correlate_columns(arr1, arr2):
    """
    Computes the Pearson correlation between corresponding columns of two matrices.
    
    Parameters:
    arr1 (np.ndarray): First matrix of shape (370, 1000)
    arr2 (np.ndarray): Second matrix of shape (370, 1000)
    
    Returns:
    np.ndarray: 1D array of correlations for each column (size 1000)
    """
    # Ensure input arrays are numpy arrays
    arr1 = np.asarray(arr1)
    arr2 = np.asarray(arr2)
    
    # Subtract the mean of each column (normalize)
    arr1_centered = arr1 - np.mean(arr1, axis=0)
    arr2_centered = arr2 - np.mean(arr2, axis=0)
    
    # Compute the numerator (covariance)
    numerator = np.sum(arr1_centered * arr2_centered, axis=0)
    
    # Compute the denominator (product of standard deviations)
    denominator = np.sqrt(np.sum(arr1_centered**2, axis=0) * np.sum(arr2_centered**2, axis=0))
    
    # Compute the Pearson correlation for each column
    correlation = numerator / denominator
    
    return correlation

In [110]:
def get_regs(events_fn):
    from nilearn.glm.first_level import make_first_level_design_matrix

    events = pd.read_csv(events_fn,delimiter='\t')

    t_r = 2.0 
    n_scans = epi.shape[-1]
    frame_times = (np.arange(n_scans) * t_r)

    X1 = make_first_level_design_matrix(frame_times,events,drift_model="polynomial",drift_order=3,hrf_model="SPM") #

    #face_reg = X1['face'].values
    #place_reg = X1[['body', 'house', 'object', 'scene', 'scramble']].values.sum(axis=1)

    face_reg = X1[['face','body']].values.sum(axis=1)
    place_reg = X1[['house','scene']].values.sum(axis=1)
    
    return face_reg,place_reg

In [111]:
def map_corr2reg(epi,roi,reg,ofn):
    epi_flat = epi.numpy().reshape(-1,n_scans)
    gm_idx = roi.flatten()==1
    std1_idx = epi_flat.std(axis=1)>1e-3
    use_idx=gm_idx*std1_idx
    voxel_array = epi_flat[use_idx,:]
    voxel_array = (voxel_array-voxel_array.mean(axis=1)[:,np.newaxis]) / voxel_array.std(axis=1)[:,np.newaxis]
    reg_arr = np.repeat(reg[:,np.newaxis], voxel_array.shape[0], axis=1)
    corr_vals = correlate_columns(voxel_array.transpose(),reg_arr)
    corr_arr = np.zeros(epi_flat.shape[0])
    corr_arr[use_idx]=corr_vals
    corr_nii = roi.new_image_like(corr_arr.reshape(roi.shape))
    #ofn=os.path.join('../Data/StudyForrest/DeepCor-baselines-2/',f'corr2face_S{s}_R{r}.nii.gz')
    corr_nii.to_filename(ofn)

In [112]:
indir = '../Data/StudyForrest/fmriprep/'
subs = [s for s in os.listdir(indir) if all((s.startswith('sub'),not s.endswith('.html')))]
n = len(subs)
n

14

In [7]:
epi_fn_temp = '../Data/StudyForrest/fmriprep/{sub}/ses-localizer/func/{sub}_ses-localizer_task-objectcategories_run-{r}_bold_space-MNI152NLin2009cAsym_preproc.nii.gz'
compcor_fn_temp = '../Data/StudyForrest/CompCor-maps-forrest/{sub}-COMPCOR-objectcategories_run-{r}.nii.gz'
events_fn_temp = '../Data/StudyForrest/events/{sub}_ses-localizer_task-objectcategories_run-{r}_events.tsv'

In [114]:
for s in tqdm(range(14)):
    for r in [1,2,3,4]:
        sub = subs[s]

        epi_fn = epi_fn_temp.format(sub=sub,r=r)
        compcor_fn = compcor_fn_temp.format(sub=sub,r=r)
        events_fn = events_fn_temp.format(sub=sub,r=r)
        gm_fn = '../Data/StudyForrest/fmriprep/mask_roi.nii'

        #print([os.path.exists(fn) for fn in [epi_fn,compcor_fn,events_fn]])
        assert all([os.path.exists(fn) for fn in [epi_fn,compcor_fn,events_fn]]),'missing files'

        epi = ants.image_read(epi_fn)
        compcor = ants.image_read(compcor_fn)
        roi = ants.image_read(gm_fn)

        n_scans = epi.shape[-1]

        face_reg,place_reg = get_regs(events_fn)
        map_corr2reg(epi,roi,face_reg,os.path.join('../Data/StudyForrest/DeepCor-baselines-2/',f'preproc-corr2face_S{s}_R{r}.nii.gz'))
        map_corr2reg(epi,roi,place_reg,os.path.join('../Data/StudyForrest/DeepCor-baselines-2/',f'preproc-corr2place_S{s}_R{r}.nii.gz'))
        
        map_corr2reg(compcor,roi,face_reg,os.path.join('../Data/StudyForrest/DeepCor-baselines-2/',f'compcor-corr2face_S{s}_R{r}.nii.gz'))
        map_corr2reg(compcor,roi,place_reg,os.path.join('../Data/StudyForrest/DeepCor-baselines-2/',f'compcor-corr2place_S{s}_R{r}.nii.gz'))

100%|██████████| 14/14 [03:30<00:00, 15.02s/it]


In [138]:
for s in tqdm(range(14)):
    image_files = [ants.image_read(os.path.join('../Data/StudyForrest/DeepCor-baselines-2/',f'preproc-corr2face_S{s}_R{r}.nii.gz')) for r in [1,2,3,4]]
    image_avg = image_files[0].new_image_like(np.array([image_file.numpy() for image_file in image_files]).mean(axis=0))
    image_avg.to_filename(os.path.join('../Data/StudyForrest/DeepCor-baselines-2/',f'preproc-corr2face_S{s}_R_avg.nii.gz'))
    
for s in tqdm(range(14)):
    image_files = [ants.image_read(os.path.join('../Data/StudyForrest/DeepCor-baselines-2/',f'preproc-corr2place_S{s}_R{r}.nii.gz')) for r in [1,2,3,4]]
    image_avg = image_files[0].new_image_like(np.array([image_file.numpy() for image_file in image_files]).mean(axis=0))
    image_avg.to_filename(os.path.join('../Data/StudyForrest/DeepCor-baselines-2/',f'preproc-corr2place_S{s}_R_avg.nii.gz'))
    
image_files = [ants.image_read(os.path.join('../Data/StudyForrest/DeepCor-baselines-2/',f'preproc-corr2face_S{s}_R_avg.nii.gz')) for s in range(14)]
image_avg =np.array([image_file.numpy() for image_file in image_files]).mean(axis=0)
image_files[0].new_image_like(image_avg).to_filename(os.path.join('../Data/StudyForrest/DeepCor-baselines-2/','preproc-corr2face_grand_average.nii.gz'))

image_files = [ants.image_read(os.path.join('../Data/StudyForrest/DeepCor-baselines-2/',f'preproc-corr2place_S{s}_R_avg.nii.gz')) for s in range(14)]
image_avg =np.array([image_file.numpy() for image_file in image_files]).mean(axis=0)
image_files[0].new_image_like(image_avg).to_filename(os.path.join('../Data/StudyForrest/DeepCor-baselines-2/','preproc-corr2place_grand_average.nii.gz'))

100%|██████████| 14/14 [00:00<00:00, 27.91it/s]
100%|██████████| 14/14 [00:00<00:00, 28.15it/s]


In [129]:
for s in tqdm(range(14)):
    image_files = [ants.image_read(os.path.join('../Data/StudyForrest/DeepCor-baselines-2/',f'compcor-corr2face_S{s}_R{r}.nii.gz')) for r in [1,2,3,4]]
    image_avg = image_files[0].new_image_like(np.array([image_file.numpy() for image_file in image_files]).mean(axis=0))
    image_avg.to_filename(os.path.join('../Data/StudyForrest/DeepCor-baselines-2/',f'compcor-corr2face_S{s}_R_avg.nii.gz'))
    
for s in tqdm(range(14)):
    image_files = [ants.image_read(os.path.join('../Data/StudyForrest/DeepCor-baselines-2/',f'compcor-corr2place_S{s}_R{r}.nii.gz')) for r in [1,2,3,4]]
    image_avg = image_files[0].new_image_like(np.array([image_file.numpy() for image_file in image_files]).mean(axis=0))
    image_avg.to_filename(os.path.join('../Data/StudyForrest/DeepCor-baselines-2/',f'compcor-corr2place_S{s}_R_avg.nii.gz'))
    
image_files = [ants.image_read(os.path.join('../Data/StudyForrest/DeepCor-baselines-2/',f'compcor-corr2face_S{s}_R_avg.nii.gz')) for s in range(14)]
image_avg =np.array([image_file.numpy() for image_file in image_files]).mean(axis=0)
image_files[0].new_image_like(image_avg).to_filename(os.path.join('../Data/StudyForrest/DeepCor-baselines-2/','compcor-corr2face_grand_average.nii.gz'))

image_files = [ants.image_read(os.path.join('../Data/StudyForrest/DeepCor-baselines-2/',f'compcor-corr2place_S{s}_R_avg.nii.gz')) for s in range(14)]
image_avg =np.array([image_file.numpy() for image_file in image_files]).mean(axis=0)
image_files[0].new_image_like(image_avg).to_filename(os.path.join('../Data/StudyForrest/DeepCor-baselines-2/','compcor-corr2place_grand_average.nii.gz'))

100%|██████████| 14/14 [00:00<00:00, 28.29it/s]
