Generate Connectome from Niftis
====

This notebook generates a connectome set for use with connectome_quick from 4d Nifti files

In [None]:
import os 
import glob
import multiprocessing
import numpy as np
from nilearn import image, maskers
from tqdm import tqdm 
from natsort import natsorted
from nimlab import datasets


# Don't be a pest when multiprocessing
os.nice(19)

# Function that transforms a set of nifti files to an npy file and a norms file for use with connectome_quick
# args is a tuple in the form (files[], subject_name, mask_file, output_dir)
def transform(args):
    files = args[0] 
    subject_name = args[1]
    mask_file = args[2]
    output_dir = args[3]
    subject_img = image.concat_imgs(files)
    masker = maskers.NiftiMasker(mask_file, standardize=False)
    masked = masker.fit_transform(subject_img)
    norms = np.linalg.norm(masked, axis = 0)
    np.save(os.path.join(output_dir,subject_name),masked.astype('float16'))
    np.save(os.path.join(output_dir,subject_name+'_norms'),norms.astype('float16'))

## 1. Gather the files you want to convert from nifti to numpy arrays and specify where you want the output to go

In [None]:
# Get the files to be transformed, you may need wildcards, two examples provided:
# files = natsorted(glob.glob("/lab-share/Neuro-Cohen-e2/Public/projects/GSP/CBIG_fMRI_preprocess_Legacy_GSP_500M/sub*/vol/sub*gz"))
# files = natsorted(glob.glob("/data/nimlab/Yeo_1000_nii/vol/*"))

input_dir = '/Volumes/Drobo/Research/collections/GSPtest'
output_dir = '/Volumes/Drobo/Research/connectomes/GSPtest'

# All NIMLAB analyses have used the FSL 2mm_brain_mask_dil file since shifting to python code (older analyses used the 222.nii.gz mask as in Lead-DBS)
mask_img = datasets.get_img_path("MNI152_T1_2mm_brain_mask_dil")

files = natsorted(glob.glob(os.path.join(input_dir,"sub*/vol/sub*.nii.gz")))
print(files[:5])

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

## 2. Generate a list of unique subject names (which we will then use to combine runs within subjects)

In [None]:
# Get unique subject names from dataset. This cell requires the user to change it based on the naming scheme of the files
# For instance, for files named: sub-0003_bld002_rest_skip4_stc_mc_bp_0.0001_0.08_resid_FS1mm_MNI1mm_MNI2mm_sm7_finalmask.nii.gz
# sub = f.split('/')[-1].split('_bld')[0]   will split the files using '_bld' and take what's before it to be the subject name

subjects = []
for f in files:
    sub = f.split('/')[-1].split('_bld')[0]
    subjects.append(sub)
unique_subjects = list(set(subjects))
print(len(unique_subjects))

## 3. Makes a list of runs for each subject and set-up the arguments for conversion

In [None]:
# The transform function expects lists of runs that belong to the same subject so that they can be concatenated. 
# Since every dataset may have different naming conventions, you will have to test this yourself. 's' is the subject name.

subject_args = []
for s in unique_subjects:
    runs = natsorted(glob.glob(os.path.join(input_dir,s+'*','vol',s+'*')))
    subject_args.append((runs, s, mask_img, output_dir))

# show the results for the first five subjects
print(subject_args[:5])
                     
# make sure we got everyone
number_of_subjects = len(subject_args)
print(number_of_subjects)

## 4. Run the conversion in serial or parallel as able for your computer (you want processes <= the number of cores you have access to)

In [None]:
# Convert it!

## One at a time (but without depending on multiprocessing.Pool)

for i in tqdm(subject_args):
    transform(i)

    
## Run all at once (as long as your python environment is set up correctly)

# pool = multiprocessing.Pool(processes=6)
# list(tqdm(pool.imap(transform, subject_args), total=number_of_subjects))
# pool.close()
# pool.join()