# This gets connectomes and returns them to you

In [1]:
import os
import re
import sys
import glob
import time
import gzip
import cPickle
import numpy as np
import nibabel as nib
import subprocess as sp
from scipy import io as si

In [12]:
# This is a helper function
def run_comm(command, verbose=False, wget=False):
    p = sp.Popen(command, stdout = sp.PIPE, 
                 stderr = sp.STDOUT, shell = True)
    message = ''
    # Talk a little bit about what's going on and give a progress report
    while verbose:
        line = p.stdout.readline()
        message = line
        if not line: 
            print('stop!')
            break
        else:
            if wget:
                size = re.search('[0-9]+[A-Z]',line)
                prog = re.search('[0-9]+%.*',line)
                if prog and size:
                    pg = prog.group()
                    sg = size.group()
                    message = sg + ' ' + pg
        sys.stdout.write('\r {}'.format(message))
        sys.stdout.flush()

In [3]:
# Define the work path. This is where all the files will end up
work_path = '/data1/test/connectome'
mask_dir = os.path.join(work_path, 'mask')
data_dir = os.path.join(work_path, 'data')   

# See if we need to download anything
This can take quite a long time

In [4]:
if not os.path.isdir(mask_dir):
    os.makedirs(mask_dir)
    print('I am downloading the template files form figshare')
    comm1 = 'wget http://files.figshare.com/1861819/template_cambridge_basc_multiscale_nii_sym.zip -P {}'.format(mask_dir)
    run_comm(comm1)
    comm2 = 'unzip {} -d {}'.format(os.path.join(mask_dir,'template_cambridge_basc_multiscale_nii_sym.zip'), mask_dir)
    run_comm(comm2)
    comm3 = 'mv {} {}'.format(os.path.join(mask_dir, 'template_cambridge_basc_multiscale_nii_sym', '*'), mask_dir)
    run_comm(comm3)
    comm4 = 'rm -rf {}'.format(os.path.join(mask_dir, 'template_cambridge_basc_multiscale_nii_sym'))
    run_comm(comm4)
    comm5 = 'rm {}'.format(os.path.join(mask_dir, 'template_cambridge_basc_multiscale_nii_sym.zip'))
    run_comm(comm5)
else:
    print('The mask folder is already there, I assume that all the files are inside')
    
if not os.path.isdir(data_dir):
    os.makedirs(data_dir)
    print('I am downloading the data form figshare')
    comm1 = 'wget http://downloads.figshare.com/article/public/1160600 -P {}'.format(data_dir)
    run_comm(comm1, True, True)
    comm2 = 'unzip {} -d {}'.format(os.path.join(data_dir, '1160600'), os.path.join(data_dir, 'tmp'))
    run_comm(comm2, True)
    comm3 = 'mv {}/* {} -v'.format(os.path.join(data_dir, 'out'), data_dir)
    run_comm(comm3)
    comm4 = 'rm -rfv {} {}'.format(os.path.join(data_dir, '1160600'), os.path.join(data_dir, 'tmp'))
    run_comm(comm3, True)
else:
    print('The data folder is already there, I assume that all the files are inside')

The mask folder is already there, I assume that all the files are inside
The data folder is already there, I assume that all the files are inside


# Begin with generating the connectomes

In [13]:
# Set up some things
out_dir = os.path.join(work_path, 'connectomes')
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)

mask_temp = 'template_cambridge_basc_multiscale_sym_scale{:03}.nii.gz'
data_temp = 'fmri_*_session1_run1.nii.gz'
out_temp = 'connectomes_cobre_{}.{}'

# These are the scales that the cambridge template comes in
scales = [7,12,20,36,64,122,197,325,444]

In [6]:
# Get all the files in the directory
files = glob.glob(os.path.join(data_dir, data_temp))
subs = dict()
for f in files:
    fname = os.path.basename(f)
    # Get the subject name
    sub = re.search('(?<=fmri_)[a-zA-Z0-9]+',fname).group()
    if not sub in subs.keys():
        subs[sub] = f
    else:
        message('There are 2 of {}, the second one was {}'.format(sub, f))
        raise Exception(message)

In [7]:
num_subs = len(subs.keys())
scale_dict = dict()
for scale in scales:
    scale_name = 'scale_{}'.format(scale)
    mask_name = mask_temp.format(scale)
    mask_path = os.path.join(mask_dir, mask_name)
    m_img = nib.load(mask_path)
    mask = m_img.get_data()
    scale_dict[scale_name] = (mask, np.unique(mask[mask!=0]))

In [8]:
cobre_connectomes = dict()
num_subs = len(subs.keys())
avg_time = np.array([])
for s_id, sub in enumerate(subs.keys()):
    p_c = float(s_id + 1) / num_subs * 100
    rem = num_subs - (s_id + 1)

    start = time.time()
    s_img = nib.load(subs[sub])
    data = s_img.get_data()
    t_points = data.shape[3]

    for scale in scales:
        scale_name = 'scale_{}'.format(scale)
        mask = scale_dict[scale_name][0]
        rois = scale_dict[scale_name][1]
        num_rois = len(rois)

        data_rois = np.zeros((num_rois, t_points))
        for ind, roi in enumerate(rois):
            data_rois[ind,:] = np.mean(data[mask==roi,:],axis=0)
        mat = np.corrcoef(data_rois)

        if not scale_name in cobre_connectomes.keys():
            cobre_connectomes[scale_name] = np.zeros((num_rois, num_rois, num_subs))
        cobre_connectomes[scale_name][..., s_id] = mat
    stop = time.time()
    took = stop - start
    avg_time = np.append(avg_time, took)
    avg = np.average(avg_time)
    rem_t = rem * avg
    sys.stdout.write('\r #{} took {:.3f} seconds, {:.1f}% done, {:.2f} more seconds to go'.format(s_id+1, took, p_c, rem_t))
    sys.stdout.flush()

 #146 took 1.444 seconds, 100.0% done, 0.00 more seconds to go

# Save the output in a big file for convenience

In [9]:
# Save the subjects in the dictionary as well
cobre_connectomes['subjects'] = subs.keys()

In [10]:
# Save the dictionary as a .mat file
si.savemat(os.path.join(work_path, out_temp.format('all','mat')), cobre_connectomes)

In [11]:
# Save the dictionary as a zipped pickle
with gzip.open(os.path.join(work_path, out_temp.format('all','gz')), 'wb') as f:
    cPickle.dump(cobre_connectomes, f, protocol=2)

# Save the output in separate files for figshare

In [15]:
mat_p = os.path.join(work_path, 'matlab')
if not os.path.isdir(mat_p):
    os.makedirs(mat_p)
gzp_p = os.path.join(work_path, 'zippickle')
if not os.path.isdir(gzp_p):
    os.makedirs(gzp_p)
npy_p = os.path.join(work_path, 'numpy')
if not os.path.isdir(npy_p):
    os.makedirs(npy_p)
    
for key in cobre_connectomes.keys():
    tmp = cobre_connectomes[key]
    if key == 'subjects':
        # Write out the list of subjects as a text file
        sub_f = os.path.join(work_path, 'subjects.txt')
        with open(sub_f, 'wb') as f:
            for item in tmp:
                f.write('{}\n'.format(item))
        # Continue with the next key
        continue
    # Save the array as a .mat file
    mat_f = os.path.join(mat_p ,out_temp.format(key,'mat'))
    gzp_f = os.path.join(gzp_p ,out_temp.format(key,'gz'))
    npy_f = os.path.join(npy_p ,out_temp.format(key,'npy'))
    si.savemat(mat_f, {'{}'.format(key):tmp})
    with gzip.open(gzp_f, 'wb') as f:
        cPickle.dump(tmp, f, protocol=2)
    np.save(npy_f, tmp)