In [1]:
# Imports
import os
import glob
import numpy as np
import pandas as pd
import nibabel as nib
import brainbox as bb
import nilearn as nil
import statsmodels.api as sm
from scipy import stats as st
from matplotlib import gridspec
from scipy import cluster as scl
from nilearn import plotting as nlp
from matplotlib import pyplot as plt
from sklearn import linear_model as slin
from statsmodels.sandbox import stats as sts
from matplotlib.colors import LinearSegmentedColormap
from statsmodels.sandbox.stats import multicomp as smi

In [2]:
%matplotlib inline

# Paths

In [3]:
# Paths
name = 'n308_sample'
pheno_path = '/data1/abide/Pheno/unconstrained_2box_308_sample.csv'
mask_path = '/data1/abide/Mask/mask_data_specific.nii.gz'

In [4]:
# Get the mask
m_img = nib.load(mask_path)
mask_data = m_img.get_data()
mask = mask_data != 0

In [5]:
# Get the phenotype data
pheno = pd.read_csv(pheno_path)

In [6]:
pheno.columns

Index([u'SITE_ID', u'SUB_ID', u'DX_GROUP', u'DSM_IV_TR', u'AGE_AT_SCAN',
       u'SEX', u'HANDEDNESS_CATEGORY', u'HANDEDNESS_SCORES', u'FIQ', u'VIQ',
       u'PIQ', u'FIQ_TEST_TYPE', u'VIQ_TEST_TYPE', u'PIQ_TEST_TYPE',
       u'ADI_R_SOCIAL_TOTAL_A', u'ADI_R_VERBAL_TOTAL_BV', u'ADI_RRB_TOTAL_C',
       u'ADI_R_ONSET_TOTAL_D', u'ADI_R_RSRCH_RELIABLE', u'ADOS_MODULE',
       u'ADOS_TOTAL', u'ADOS_COMM', u'ADOS_SOCIAL', u'ADOS_STEREO_BEHAV',
       u'ADOS_RSRCH_RELIABLE', u'ADOS_GOTHAM_SOCAFFECT', u'ADOS_GOTHAM_RRB',
       u'ADOS_GOTHAM_TOTAL', u'ADOS_GOTHAM_SEVERITY', u'SRS_VERSION',
       u'SRS_RAW_TOTAL', u'SRS_AWARENESS', u'SRS_COGNITION',
       u'SRS_COMMUNICATION', u'SRS_MOTIVATION', u'SRS_MANNERISMS',
       u'SCQ_TOTAL', u'AQ_TOTAL', u'COMORBIDITY', u'CURRENT_MED_STATUS',
       u'MEDICATION_NAME', u'OFF_STIMULANTS_AT_SCAN',
       u'VINELAND_RECEPTIVE_V_SCALED', u'VINELAND_EXPRESSIVE_V_SCALED',
       u'VINELAND_WRITTEN_V_SCALED', u'VINELAND_COMMUNICATION_STANDARD',
       u'V

In [7]:
# add coded variable for ratio of VIQ and PIQ
pheno['VerbRatio'] = pheno['VIQ'] / pheno['PIQ']

In [8]:
# Define a new colormap
cdict = {'red':   ((0.0, 0.0, 0.0),
                   (0.5, 0.0, 0.0),
                   (0.75, 1.0, 1.0),
                   (1.0, 1.0, 1.0)),

         'green': ((0.0, 1.0, 1.0),
                   (0.25, 0.0, 0.0),
                   (0.5, 0.0, 0.0),
                   (0.75, 0.0, 0.0),
                   (1.0, 1.0, 1.0)),

         'blue':  ((0.0, 1.0, 1.0),
                   (0.25, 1.0, 1.0),
                   (0.5, 0.0, 0.0),
                   (1.0, 0.0, 0.0))
        }
hotcold = LinearSegmentedColormap('hotcold', cdict)

# Scale iteration

In [9]:
scales = [7,12,20]
for scale in scales:
    print('\n\nSCALE {}'.format(scale))
    # Scale stuff
    prior_path = '/data1/cambridge/template/template_cambridge_basc_multiscale_sym_scale{:03d}.nii.gz'.format(scale)
    netstack_path = '/data1/subtypes/serial_preps/netstack_demeaned_{}_scale_{:03d}.npy'.format(name, scale)
    netraw_path = '/data1/subtypes/serial_preps/netstack_raw_{}_scale_{:03d}.npy'.format(name, scale)
    corrmat_path = '/data1/subtypes/serial_preps/correlation_matrix_{}_scale_{:03d}.npy'.format(name, scale)
    
    # Get the prior
    p_img = nib.load(prior_path)
    prior = p_img.get_data()
    
    # Turn the priors into an image
    prior = nib.load(prior_path)
    prior_data = prior.get_data()
    prior_temp = np.zeros((prior_data.shape + (scale,)))
    for sc_id in range(scale):
        tmp = np.zeros_like(prior_data)
        tmp[prior_data==sc_id+1] = sc_id + 1
        prior_temp[..., sc_id] = tmp
    prior_img = nib.Nifti1Image(prior_temp, affine=m_img.get_affine(), header=m_img.get_header())
    
    # Load the serialized netstack
    netstack = np.load(netstack_path)
    corr_mat = np.load(corrmat_path)
    
    subtypes = 5

    n_sub = netstack.shape[2]
    n_vox = netstack.shape[1]

    link_store = np.zeros((n_sub-1,4,scale))
    part_store = np.zeros((scale, n_sub))
    sbt_store = np.zeros((scale, subtypes, n_vox))
    weight_store = np.zeros((scale, subtypes, n_sub))

    # Iterate through the networks
    for net_id in range(scale):
        # Compute linkage with Ward's criterion
        link_mat = scl.hierarchy.linkage(corr_mat[net_id, ...] , method='ward')
        link_store[..., net_id] = link_mat
        # Partition the linkage to get a given number of subtypes
        part_sub = scl.hierarchy.fcluster(link_mat, subtypes, criterion='maxclust')
        part_store[net_id, :] = part_sub

        sub_stack = np.zeros((n_vox, subtypes))
        for s_id in range(subtypes):
            sbt = np.mean(netstack[net_id, :, part_sub==s_id+1],0)
            sub_stack[:,s_id] = sbt
            sbt_store[net_id, s_id, :] = sbt

        # Init store - Compute the weights
        for s_id in range(subtypes):
            type_map = sub_stack[:, s_id]
            weight_store[net_id, s_id, :] = np.array([np.corrcoef(type_map, netstack[net_id, :, x])[0,1] for x in range(n_sub)])

        # Init store - Compute the weights
        for s_id in range(subtypes):
            type_map = sub_stack[:, s_id]
            weight_store[net_id, s_id, :] = np.array([np.corrcoef(type_map, netstack[net_id, :, x])[0,1] for x in range(n_sub)])
    
    # Make the grand average
    gdavg = np.zeros(mask.shape + (scale,))

    scale = netstack.shape[0]
    n_sub = netstack.shape[2]
    n_vox = netstack.shape[1]

    link_store = np.zeros((n_sub-1,4,scale))
    part_store = np.zeros((scale, n_sub))
    sbt_store = np.zeros((scale, subtypes, n_vox))
    weight_store = np.zeros((scale, subtypes, n_sub))

    # Iterate through the networks
    for net_id in range(scale):
        # Compute linkage with Ward's criterion
        link_mat = scl.hierarchy.linkage(corr_mat[net_id, ...] , method='ward')
        link_store[..., net_id] = link_mat
        # Partition the linkage to get a given number of subtypes
        part_sub = scl.hierarchy.fcluster(link_mat, subtypes, criterion='maxclust')
        part_store[net_id, :] = part_sub

        sub_stack = np.zeros((n_vox, subtypes))
        for s_id in range(subtypes):
            sbt = np.mean(netstack[net_id, :, part_sub==s_id+1],0)
            sub_stack[:,s_id] = sbt
            sbt_store[net_id, s_id, :] = sbt

        # Init store - Compute the weights
        for s_id in range(subtypes):
            type_map = sub_stack[:, s_id]
            weight_store[net_id, s_id, :] = np.array([np.corrcoef(type_map, netstack[net_id, :, x])[0,1] for x in range(n_sub)])

        # Init store - Compute the weights
        for s_id in range(subtypes):
            type_map = sub_stack[:, s_id]
            weight_store[net_id, s_id, :] = np.array([np.corrcoef(type_map, netstack[net_id, :, x])[0,1] for x in range(n_sub)])

    cov_list = ['ADOS_TOTAL', 'VIQ', 'DX_GROUP',
                'VINELAND_ABC_STANDARD', 'SRS_RAW_TOTAL', 
                'ADOS_SOCIAL', 'ADOS_COMM', 'ADOS_STEREO_BEHAV', 'VerbRatio',
               'ADI_R_VERBAL_TOTAL_BV', 'ADI_R_SOCIAL_TOTAL_A', 'ADI_RRB_TOTAL_C']
            
    for cov in cov_list:
        cov_index = pd.notnull(pheno.replace(-9999, np.nan)[cov])
        cov_pheno = pheno[cov_index]
        # Generate the model matrix
        factors = [cov, 'SEX', 'AGE_AT_SCAN', 'FD_scrubbed']
        # Make dummy variables for the site factor
        site_factor = pd.get_dummies(cov_pheno['SITE_ID'])
        # Turn the first site into the intercept
        site_factor = site_factor.rename(columns={site_factor.keys()[0]: 'INTERCEPT'})
        site_factor['INTERCEPT'] = 1
        # Get the other variables
        other_factors = cov_pheno.ix[:,factors]
        # Turn diagnosis into [0,1] vector
        #other_factors['DX_GROUP'] = other_factors['DX_GROUP'].values - 1
        # Demean age
        other_factors['AGE_AT_SCAN'] = other_factors['AGE_AT_SCAN']-np.mean(other_factors['AGE_AT_SCAN'].values)
        # Demean the covariate
        other_factors[cov] = other_factors[cov]-np.mean(other_factors[cov].values)
        # Put them back together
        glm_pheno = pd.concat([site_factor, other_factors], axis=1)
        cov_weight = weight_store[..., cov_index.values]
        res_store = list()
        pval_store = np.zeros((scale, subtypes))
        for net_id in range(scale):
            res_list = list()
            # Loop through the subtypes
            for s_id in range(subtypes):
                model = sm.OLS(cov_weight[net_id, s_id, :], glm_pheno)
                results = model.fit()
                # Save the p-values
                pval_store[net_id, s_id] = results.pvalues[cov]
                res_list.append(results)
            res_store.append(res_list)
        # Now look at the mask of p-values passing FDR Correction
        pval_vec = np.reshape(pval_store, np.prod(pval_store.shape))
        pcorr_vec = smi.multipletests(pval_vec.flatten(), alpha=0.05, method='fdr_bh')
        # pcorr_vec = sts.multicomp.fdrcorrection0(pval_vec, 0.05)
        # Find the hits
        if np.sum(pcorr_vec[0]) > 0:
            pcorr_store = np.reshape(pcorr_vec[0], pval_store.shape)
            hits = np.argwhere(pcorr_store!=0)
            print('\n    {} findings for {} ({})'.format(np.sum(pcorr_vec[0]), cov, list(hits)))
        else:
            print('\n    {} findings for {}'.format(np.sum(pcorr_vec[0]), cov))



SCALE 7

    4 findings for ADOS_TOTAL ([array([2, 1]), array([2, 2]), array([2, 3]), array([5, 2])])

    4 findings for VIQ ([array([5, 2]), array([5, 3]), array([6, 2]), array([6, 4])])

    9 findings for DX_GROUP ([array([2, 0]), array([2, 1]), array([2, 2]), array([2, 3]), array([3, 1]), array([3, 2]), array([4, 0]), array([4, 2]), array([6, 4])])

    0 findings for VINELAND_ABC_STANDARD

    0 findings for SRS_RAW_TOTAL

    1 findings for ADOS_SOCIAL ([array([2, 1])])

    4 findings for ADOS_COMM ([array([2, 0]), array([2, 1]), array([2, 3]), array([5, 1])])

    0 findings for ADOS_STEREO_BEHAV

    0 findings for VerbRatio

    0 findings for ADI_R_VERBAL_TOTAL_BV

    0 findings for ADI_R_SOCIAL_TOTAL_A

    0 findings for ADI_RRB_TOTAL_C


SCALE 12

    7 findings for ADOS_TOTAL ([array([1, 0]), array([1, 4]), array([6, 1]), array([6, 2]), array([6, 4]), array([7, 2]), array([7, 3])])

    7 findings for VIQ ([array([1, 0]), array([1, 4]), array([6, 1]), array([6, 4]), 