In [None]:
### SETTINGS ###
project_dir = '/homes_unix/agillig/github_repos/ginna' #specify your path to the github repository

n_perm = 100 # number of permutations per batch; reduced for the sake of the example (publication: 10,000)
n_batches = 1 # number of batches. can be useful to split the work in a cluster environment

compute_all_rsns = False # if True, compute all RSNs; if False, compute only RSN05 as an example

In [3]:
import sys
sys.path.insert(1, project_dir + '/code')

import func_toolbox as ftools
from func_toolbox import fetch_neurosynth_data
from nilearn import image
import null_parcellations

import numpy as np
import os
import pandas as pd
from pathlib import Path
from scipy.stats import zscore
from sklearn.decomposition import PCA


neurosynth_terms_file = project_dir + '/data/terms/BCS_3D.csv'
os.makedirs(Path(neurosynth_terms_file).parent, exist_ok=True)

# download the file from the Pacela et al. 2021 paper repo 
# https://github.com/vale-pak/BCS

if not os.path.exists(neurosynth_terms_file):
    fetch_neurosynth_data(f'{project_dir}/data')

# https://github.com/vale-pak/BCS/blob/main/BCS_3D.csv
df = pd.read_csv(neurosynth_terms_file, sep = ',')

neurosynth_terms = df['Functions']

fcu = ftools.Utilities()
atlas_str = "aicha-aal3"

fcu.set_project_dir(project_dir)
fcu.set_atlas_name(atlas_str)

# create a new directory for the analysis
analysis_dir = project_dir + f'/analysis/mean_RSNs_{atlas_str}'
os.makedirs(analysis_dir, exist_ok=True)

mip_rsn_dir = project_dir + f'/Results/mip/mip_rsn_parcellated/aicha'


# Data processing

## create atlas : aicha + AAL cerebellum / BG

In [4]:
results = {}

In [5]:
parcellation_atlas_file = f'{project_dir}/data/parcellation_atlases/aicha-aal3/parcels_aicha-aal3.nii.gz'

# the atlas is already present in the github repository
if not os.path.exists(parcellation_atlas_file):
        atlases_dir = ''
        file_cortical = atlases_dir + '/AICHA_v2_websiteC/AICHA.nii'
        file_subcortical = atlases_dir + '/AAL3/AAL3v1.nii'

        cortical_img = image.load_img(file_cortical)
        new_data = np.zeros_like(cortical_img.get_fdata(), dtype = 'int32')

        cortical_data = image.load_img(file_cortical).get_fdata()
        subcortical_data = image.load_img(file_subcortical).get_fdata()
        # cortical
        max_cortical = np.max(cortical_data)
        for i, value in enumerate(np.unique(cortical_data)[1:]):
                new_data[cortical_data == value] = i + 1 
        # cerebellum
        # indices : from 95-120 for cerebellum; 121-170 for subcrotcial/brainstem
        for i, value in enumerate([l for l in range(95 , 121)]):
                new_data[subcortical_data == value] = max_cortical + i + 1 

        new_img = image.new_img_like(cortical_img, new_data)
        out_map = parcellation_atlas_file
        os.makedirs(os.path.dirname(out_map), exist_ok=True)
        new_img.to_filename(out_map)

In [6]:
#path to GINNA zstat maps
atlas_dir = f'{project_dir}/atlas/zmaps'
rsn_files = [os.path.join(atlas_dir, f) for f in os.listdir(atlas_dir) if f.endswith('.nii')]
rsn_files.sort()

In [7]:
# # # Create A 4D volume with all 506 maps of the model
# this may take a few min
terms_maps_dir = f'{project_dir}/data/dataset'
terms_maps_files = [os.path.join(terms_maps_dir, f) for f in os.listdir(terms_maps_dir) if f.endswith('.nii.gz')]
terms_maps_files.sort()

out_dir = f'{terms_maps_dir}/concatenated'
os.makedirs(out_dir, exist_ok=True)

out_name = out_dir + '/dataset_concatenated.nii.gz'
if os.path.isfile(out_name) == False:
    image.concat_imgs(terms_maps_files).to_filename(out_name)
concat_ds = image.load_img(out_name)

terms_maps_files_all = terms_maps_files

In [8]:
# Create & retrieve parcellated 506 meta analytic maps
# should also take a few minutes, but needs to be done only once

n_terms = concat_ds.shape[3]

parcellated_dataset_dir = f'{project_dir}/data/dataset/parcellated/{atlas_str}'
os.makedirs(parcellated_dataset_dir, exist_ok=True)

parcellated_dataset_file = os.path.join(parcellated_dataset_dir, f'neurosynthterms_parcellations_{atlas_str}.csv')

parcellated = []

if os.path.isfile(parcellated_dataset_file) == False:
    for t in range(n_terms):
        temp_img = image.index_img(concat_ds, t)
        parcellated.append(fcu.parcellate(temp_img, atlas=parcellation_atlas_file))

    parcellated = np.array(parcellated).squeeze()
    terms = neurosynth_terms.values
    # print(parcellated.shape)
    parcels = [i for i in range(1, parcellated.shape[1] +1)]
    dataset_parcellated = pd.DataFrame(parcellated, index=terms, columns = parcels)
    dataset_parcellated.to_csv(parcellated_dataset_file)
else:
    dataset_parcellated = pd.read_csv(parcellated_dataset_file, sep = ',', index_col = 0)

fcu.dataset_parcellated = dataset_parcellated


## Spatial correlation between RSNs & the Neurosynth maps

In [None]:
# for each rsn, compute the spatial correlation with the 506 meta analytic maps (parcellated)
atlas_img = '/homes_unix/agillig/Atlases/RSN_N41_zNpair_clean1.nii'
parcellation_dir = analysis_dir + '/parcellations'
os.makedirs(parcellation_dir, exist_ok=True)


rsn_str = pd.DataFrame(rsn_files).iloc[:,0].str.split('/').str[-1].str[-6:-4]

if compute_all_rsns == False:
    rsn_str = [rsn_str[4]] #limited to RSN05 to reduce computation time

for i, rsn in enumerate(rsn_str):
    parcellation_file = parcellation_dir + f'/rsn-{rsn}/rsn-{rsn}_unique_parcellated.csv'
    os.makedirs(os.path.dirname(parcellation_file), exist_ok=True)

    index = int(rsn) - 1
    # tmp_data = image.index_img(atlas_img, index)
    # affine = tmp_img.affine
    if os.path.isfile(parcellation_file) == False:
        tmp_img = image.load_img(rsn_files[index])
        tmp_parcellatd = fcu.parcellate(tmp_img, atlas=parcellation_atlas_file)
        np.savetxt(parcellation_file, tmp_parcellatd, delimiter = ',')
    data = np.loadtxt(parcellation_file, delimiter = ',')


    corr_temp = [np.corrcoef(data, dataset_parcellated.iloc[j,:])[1,0] for j in range(n_terms)]
    
    results[rsn] = {'term': neurosynth_terms, 'spatial_correlation': corr_temp}

# Non parametric statistics

In [17]:
# null_projections = {}
# use brainsmash to generate surrogate data that preserves spatial autocorrelation (Burt, 2020)
# https://github.com/murraylab/brainsmash: cf null_parcellations.py

# once computed, for each rsn, compute the correlation of the null parcellations with the 506 meta analytic maps (parcellated)

for rsn in rsn_str:
    null_parcellations.generate_null(int(rsn), project_dir, n_perm=n_perm, n_batches=n_batches)
    null_distr = fcu.compute_correlation_null(rsn=rsn, n_perm=n_perm, n_batches=n_batches, overwrite=True) #set overwrite to False if you have already computed the null distributions

    p, pcor = fcu.compute_pvalues(results[rsn]['spatial_correlation'], null_distr)

    results[rsn]['p'] = p
    results[rsn]['pcor'] = pcor
    results[rsn]['is_significant'] = (results[rsn]['pcor'] < 0.05).tolist()


processing rsn 05
batch 1
generating 100 surrogates
elapsed time: 24.85 s
saving surrogate maps to /homes_unix/agillig/github_repos/ginna/analysis/mean_RSNs_aicha-aal3/null_parcellations/rsn-05/rsn-05_null_parcellations_batch-01_of_1.csv
processing rsn 05
saving file


# Results summary for RSN05

In [19]:
res_table = pd.DataFrame.from_dict(results['05']).sort_values(by='spatial_correlation', ascending=False)
res_table

Unnamed: 0,term,spatial_correlation,p,pcor,is_significant
258,money,0.556019,0.009901,0.009901,True
321,preferences,0.497313,0.009901,0.009901,True
84,decision_making,0.471232,0.009901,0.009901,True
49,choice,0.454076,0.009901,0.019802,True
468,value,0.451444,0.009901,0.019802,True
...,...,...,...,...,...
482,visual,-0.116171,0.732673,1.000000,False
267,motor_imagery,-0.118895,0.772277,1.000000,False
194,imagery,-0.122966,0.811881,1.000000,False
273,movements,-0.125567,0.732673,1.000000,False


# Complementary: Principal component analysis

In [38]:
# z-score threshold for the RSN mask
thr = 1

for i, rsn in enumerate(rsn_str):

    rsn_results = results[rsn]

    n_components = 10
    print(f'rsn {rsn}')
    # terms_maps_files[pvals_all_uncor[rsn_str.index(rsn)] < 0.05]
    # uncorrected
    
    n_pca_components = np.min((n_components,len(np.nonzero(rsn_results['is_significant'])[0])))

    pca = PCA(n_components=n_pca_components, 
              svd_solver='full')

    # add masking of data with rsn map before computing pca
    rsn_img = image.load_img(rsn_files[i])

    rsn_data = fcu.parcellate(rsn_img, atlas=parcellation_atlas_file).squeeze()


    # create a binary mask of the RSN map
    rsn_mask = np.array([1 if i > thr else 0 for i in rsn_data])
    rsn_regions = np.where(rsn_mask == 1)[0]


    rsn_terms_indices = np.where(rsn_results['is_significant'])[0]
    rsn_terms = neurosynth_terms.iloc[rsn_terms_indices]
    # print(f'significant terms for rsn {rsn}: {rsn_terms.values}')
    # skip if less than 3 terms are significant
    if len(rsn_terms_indices) < 3:
        print('less than 3 terms are significant, skipping')
        continue


    input_rsn_data = dataset_parcellated.iloc[rsn_terms_indices, rsn_regions].T

    input_rsn_data = zscore(input_rsn_data)
    x_new = pca.fit_transform(input_rsn_data)

    print(f'explained variance: {pca.explained_variance_ratio_}')
    rsn_results['pca'] = pca

    pca_results = pd.DataFrame(pca.components_, 
                               index=[f'PC{c:02d}' for c in range(1, pca.components_.shape[0] + 1)], 
                               columns = rsn_terms.values).T

    # loadings can be sorted in the following manner
    print(f'ordered loadings for PC01:')
    print(pca_results.sort_values(by='PC01', ascending=False))


rsn 05
explained variance: [0.28079161 0.25684348 0.19920988 0.14174494 0.1214101 ]
ordered loadings for PC01:
                     PC01      PC02      PC03      PC04      PC05
preferences      0.557958 -0.151097 -0.493970 -0.582350  0.287603
decision_making  0.508642  0.322498  0.525055 -0.299288 -0.521558
value            0.507006 -0.439341 -0.221607  0.613673 -0.352450
choice           0.380679  0.616471  0.029275  0.441104  0.528789
money            0.167317 -0.547822  0.656007 -0.011341  0.491349
