Take the netstack and linkage out of the rest of the pipeline and instead pickle things to disk for speed reasons.

In [67]:
# Imports
import os
import sys
import glob
import numpy as np
import pandas as pd
import nibabel as nib
import brainbox as bb

In [68]:
# Dummy variables
pheno_path_0 = '/data1/abide/Pheno/existing_pheno.csv'
pheno_path_1 = '/data1/abide/Pheno/unconstrained_3box_189_sample.csv'
pheno_path_2 = '/data1/abide/Pheno/unconstrained_2box_308_sample.csv'
pheno_path_3 = '/data1/abide/Pheno/nyu_pheno.csv'
scales = [7, 12, 20, 36, 64]
warn = False
sc_id = 0
# Variables
scale = scales[sc_id]
mtp = 'stability_maps'
name = 'full_sample'
# Input paths
tpl = '*_fmri_{:07d}_session_1_run1_stability_maps.nii.gz'
prior_path = '/data1/cambridge/template/template_cambridge_basc_multiscale_sym_scale{:03d}.nii.gz'.format(scale)
data_path = '/data1/abide/Out/Scores/sc{:02d}/time'.format(scale)
pheno_path = pheno_path_0
mask_path = '/data1/abide/Mask/mask_data_specific.nii.gz'

# Output paths
out_path = '/data1/subtypes/serial_preps'
net_raw_name = 'netstack_raw_{}_scale_{:03d}'.format(name, scale)
net_demean_name = 'netstack_demeaned_{}_scale_{:03d}'.format(name, scale)
sim_mat_name = 'correlation_matrix_{}_scale_{:03d}'.format(name, scale)
net_raw_path = os.path.join(out_path, net_raw_name)
net_demean_path = os.path.join(out_path, net_demean_name)
sim_mat_path = os.path.join(out_path, sim_mat_name)
# See if output path exists
if not os.path.isdir(out_path):
    print('Creating new output path {}'.format(out_path))
    os.makedirs(out_path)

In [69]:
def dump_things(obj, path):
    """
    dump object at path
    """
    with gzip.open(path, 'wb') as outfile:
        pickle.dump(obj, outfile, 2)
    print('\nDone serializing object @ {}'.format(path))
    return 0

In [70]:
def alt_dump(obj, path):
    np.savez(path,obj)
    return 0

In [71]:
# Get the mask
m_img = nib.load(mask_path)
mask_data = m_img.get_data()
mask = mask_data != 0

In [72]:
# Get the phenotype
pheno = pd.read_csv(pheno_path)
# Grab the corresponding file paths - should be sure that they exist
path_list = [glob.glob(os.path.join(data_path, mtp, tpl.format(r['SUB_ID'])))[0] for i, r in pheno.iterrows()]

In [73]:
for i, r in pheno.iterrows():
    if not glob.glob(os.path.join(data_path, mtp, tpl.format(r['SUB_ID']))):
        print('{} not here'.format(tpl.format(r['SUB_ID'])))

In [74]:
# Get the prior
p_img = nib.load(prior_path)
prior = p_img.get_data()

In [75]:
# Get the number of voxels and subjects
n_vox = np.sum(mask)
n_sub = len(path_list)

In [None]:
# Storage
netstack_raw = np.zeros((scale, n_vox, n_sub))
netstack_demean = np.zeros((scale, n_vox, n_sub))
sim_mat = np.zeros((scale, n_sub, n_sub))

# Get the scores maps into a voxel by subject array
net_stack = np.zeros((n_vox, n_sub))
# Iterate files
timer = bb.tools.Counter(n_sub)
for sub_id in range(n_sub):
    timer.tic()
    s_path = path_list[sub_id]
    # Getting data
    netstack_raw[..., sub_id] = nib.load(s_path).get_data()[mask].T
    timer.toc()
    timer.progress()
    
# Save the netstack
np.save(net_raw_path, netstack_raw)

# Take the grand average
grand_average = np.mean(netstack_raw, 2)
# Demean the netstack
netstack_demean = netstack_raw - np.tile(grand_average[..., None], (1,1,n_sub))

# Save the demeaned netstack
np.save(net_demean_path, netstack_demean)

 100.0 % done 0.00 seconds to go. One step takes 0.04430 and we ran for 36.88 s so far

In [None]:
# Build the correlation matrices
timer = bb.tools.Counter(scale)
for net_id in range(scale):   
    timer.tic()
    # Build correlation matrix on the network stack
    corr_mat = np.corrcoef(netstack_demean[net_id, ...], rowvar=0)
    # Store the correlation matrix
    sim_mat[net_id, ...] = corr_mat
    timer.toc()
    timer.progress()
    
# Serialize and save the output
np.save(sim_mat_path, sim_mat)

In [78]:
sim_mat

array([[[  1.00000000e+00,   1.75577320e-01,   6.35187080e-02, ...,
          -3.99486365e-02,  -1.06050022e-01,  -5.20797828e-03],
        [  1.75577320e-01,   1.00000000e+00,   1.35157567e-01, ...,
          -7.01421384e-02,  -1.18049455e-01,   3.74213284e-03],
        [  6.35187080e-02,   1.35157567e-01,   1.00000000e+00, ...,
           7.75374145e-03,   1.11153640e-01,  -2.77059065e-02],
        ..., 
        [ -3.99486365e-02,  -7.01421384e-02,   7.75374145e-03, ...,
           1.00000000e+00,   2.92764912e-02,  -1.68677292e-02],
        [ -1.06050022e-01,  -1.18049455e-01,   1.11153640e-01, ...,
           2.92764912e-02,   1.00000000e+00,   1.05671242e-02],
        [ -5.20797828e-03,   3.74213284e-03,  -2.77059065e-02, ...,
          -1.68677292e-02,   1.05671242e-02,   1.00000000e+00]],

       [[  1.00000000e+00,   1.15943465e-01,   7.23312425e-02, ...,
           1.32874698e-02,   4.29672359e-02,   2.69384684e-02],
        [  1.15943465e-01,   1.00000000e+00,   1.33074014e-0