In [1]:
import numpy as np
import pandas as pd
from os.path import join
from glob import glob
from sklearn.impute import KNNImputer

In [2]:
def jili_sidak_mc(data, alpha):
    import math
    import numpy as np

    mc_corrmat = data.corr()
    mc_corrmat.fillna(0, inplace=True)
    eigvals, eigvecs = np.linalg.eig(mc_corrmat)

    M_eff = 0
    for eigval in eigvals:
        if abs(eigval) >= 0:
            if abs(eigval) >= 1:
                M_eff += 1
            else:
                M_eff += abs(eigval) - math.floor(abs(eigval))
        else:
            M_eff += 0
    print('Number of effective comparisons: {0}'.format(M_eff))

    #and now applying M_eff to the Sidak procedure
    sidak_p = 1 - (1 - alpha)**(1/M_eff)
    if sidak_p < 0.00001:
        print('Critical value of {:.3f}'.format(alpha),'becomes {:2e} after corrections'.format(sidak_p))
    else:
        print('Critical value of {:.3f}'.format(alpha),'becomes {:.6f} after corrections'.format(sidak_p))
    return sidak_p, M_eff

In [3]:
def grab_corr(subjects, nodes, task, condition, session, atlas):
    errors = pd.Series(index=subjects, dtype=str)
    edges = pd.Series(index=subjects, name='edge', dtype=np.float64)
    node1 = nodes[0]
    node2 = nodes[1]
    
    for subject in subjects:
        try:
            if condition != None:
                corrmat = np.genfromtxt(join(data_dir, 
                                             'output/corrmats', 
                                             '{0}-session-{1}_{2}-{3}_{4}-corrmat.csv'.format(subject, 
                                                                                                        session, 
                                                                                                        task, 
                                                                                                        condition, 
                                                                                                        atlas)),
                                        delimiter=' ')
            else:
                corrmat = np.genfromtxt(join(data_dir, 
                                             'output/corrmats', 
                                             '{0}-session-{1}-{2}_network_corrmat_{3}.csv'.format(subject, 
                                                                                                        session, 
                                                                                                        task, 
                                                                                                        atlas)),
                                        delimiter=',')
            edges[subject] = corrmat[node1][node2]
            #post_retr_conn.at[subject] = np.ravel(corrmat, order='F')
        except Exception as e:
            errors[subject] = e
    return edges, errors

In [4]:
subjects = [101, 102, 103, 104, 106, 107, 108, 110, 212, 213,
            214, 215, 216, 217, 218, 219, 320, 321, 322, 323,
            324, 325, 327, 328, 329, 330, 331, 332, 333, 334,
            335, 336, 337, 338, 339, 340, 341, 342, 343, 344,
            345, 346, 347, 348, 349, 350, 451, 452, 453, 455,
            456, 457, 458, 459, 460, 462, 463, 464, 465, 467,
            468, 469, 470, 502, 503, 571, 572, 573, 574, 575,
            577, 578, 579, 580, 581, 582, 584, 585, 586, 587,
            588, 589, 590, 591, 592, 593, 594, 595, 596, 597,
            598, 604, 605, 606, 607, 608, 609, 610, 611, 612,
            613, 614, 615, 616, 617, 618, 619, 620, 621, 622,
            623, 624, 625, 626, 627, 628, 629, 630, 631, 633,
            634]
#subjects = [101, 102, 103]

sink_dir = '/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output'
data_dir = '/Users/kbottenh/Dropbox/Projects/physics-retrieval/data'
roi_dir = '/Users/kbottenh/Dropbox/Data/templates/shen2015/'
fig_dir = '/Users/kbottenh/Dropbox/Projects/physics-retrieval/figures/'

shen = '/Users/kbottenh/Dropbox/Projects/physics-retrieval/shen2015_2mm_268_parcellation.nii.gz'
craddock = '/home/kbott006/physics-retrieval/craddock2012_tcorr05_2level_270_2mm.nii.gz'
masks = ['shen2015', 'craddock2012']

tasks = ['retr', 'fci']
conditions = ['phys', 'ctrl']
sessions = ['pre', 'post']

In [5]:
subgraphs = glob(join(sink_dir, '*iq*sig_edges.csv'))

In [6]:
#load in all-data df

big_df = pd.read_csv(join(data_dir, 
                          'rescored', 
                          'physics_learning-local_efficiency-BayesianImpute.csv'), 
                     index_col=0, header=0)
drop = big_df.filter(regex=".*lEff.*").columns
big_df.drop(drop, axis=1, inplace=True)

In [7]:
big_df.rename({'post phys retr fd': 'post_phys_retr_fd', 
               'post phys fci fd': 'post_phys_fci_fd',
               'Strt.Level': 'Strt_Level'}, axis=1, inplace=True)

In [8]:
for subgraph in subgraphs:
    regression = subgraph[63:-4]
    print(regression)
    keys = regression.split('-')
    mask = keys[0]
    condition = 'Physics'
    task = keys[1].split('_')[0]
    iq = keys[2].split('_')[0]
    cov = keys[2].split('_')[1]

    conns = pd.read_csv(subgraph, index_col=0, header=0)

    conns.dropna(how='all', axis=0, inplace=True)
    conns.dropna(how='all', axis=1, inplace=True)
    conns.fillna(0, inplace=True)
    
    edges = []
    for column in conns.columns:
        ind = conns[conns[column] != 0].index
        print(ind)
        for i in ind:
            #print(column, i[0])
            edges.append((int(column), i))

    for edge in edges:
        print(regression, edge)
        edges, error = grab_corr(big_df.index, edge, task, 'Physics', '1', mask)
        edges.name = '{0} {1}'.format(mask, edge)
        big_df = pd.concat([edges,big_df], axis=1)
jili_sidak_mc(big_df.dropna(how='any'), 0.05)

craddock2012-fci_phys-FSIQ2_iqXSexXClass-sig_edges
Int64Index([234], dtype='int64')
Int64Index([52], dtype='int64')
craddock2012-fci_phys-FSIQ2_iqXSexXClass-sig_edges (52, 234)
craddock2012-fci_phys-FSIQ2_iqXSexXClass-sig_edges (234, 52)
craddock2012-fci_phys-FSIQ2_iqXClass-sig_edges
Int64Index([234], dtype='int64')
Int64Index([52], dtype='int64')
craddock2012-fci_phys-FSIQ2_iqXClass-sig_edges (52, 234)
craddock2012-fci_phys-FSIQ2_iqXClass-sig_edges (234, 52)
shen2015-fci_phys-FSIQ2_iqXSexXClass-sig_edges
Int64Index([147], dtype='int64')
Int64Index([36], dtype='int64')
shen2015-fci_phys-FSIQ2_iqXSexXClass-sig_edges (36, 147)
shen2015-fci_phys-FSIQ2_iqXSexXClass-sig_edges (147, 36)
craddock2012-fci_phys-deltaPRI_iqXSexXClass-sig_edges
Int64Index([168], dtype='int64')
Int64Index([168], dtype='int64')
Int64Index([79, 165], dtype='int64')
craddock2012-fci_phys-deltaPRI_iqXSexXClass-sig_edges (79, 168)
craddock2012-fci_phys-deltaPRI_iqXSexXClass-sig_edges (165, 168)
craddock2012-fci_phys-de

(0.0026430219055756377, 19.38140570467343)

In [9]:
big_df.columns

Index(['craddock2012 (199, 18)', 'craddock2012 (18, 199)',
       'craddock2012 (168, 165)', 'craddock2012 (168, 79)',
       'craddock2012 (165, 168)', 'craddock2012 (79, 168)',
       'shen2015 (147, 36)', 'shen2015 (36, 147)', 'craddock2012 (234, 52)',
       'craddock2012 (52, 234)', 'craddock2012 (234, 52)',
       'craddock2012 (52, 234)', 'Age', 'Strt_Level', 'RetrPhyAcc1',
       'RetrPhyAcc2', 'FCIPhyAcc1', 'FCIPhyAcc2', 'VCI1', 'PRI1', 'WMI1',
       'PSI1', 'FSIQ1', 'VCI2', 'PRI2', 'WMI2', 'PSI2', 'FSIQ2', 'Phy48Grade',
       'F', 'Mod', 'StrtLvl', 'post_phys_retr_fd', 'pre phys retr fd',
       'post gen retr fd', 'pre gen retr fd', 'post_phys_fci_fd',
       'pre phys fci fd', 'post ctrl fci fd', 'pre ctrl fci fd', 'Sex',
       'Class.Type'],
      dtype='object')

In [11]:
brain_impute = KNNImputer(n_neighbors=5, weights='distance')
imp_mat = brain_impute.fit_transform(big_df.drop(['Sex', 'Class.Type'], axis=1))
imp_df = pd.DataFrame(data=imp_mat,
                      columns=big_df.drop(['Sex', 'Class.Type'], axis=1).columns,
                      index=big_df.index)

In [12]:
imp_df.to_csv(join(sink_dir, 'acc+conn~iq-mediation_edges-KNN.csv'), sep='\t')

In [None]:
break

In [None]:
for task in tasks:
    for mask in masks:
        print(mask)
        edge_df = pd.read_csv(join(sink_dir, '{0}-{1}_whole_brain-permuted_ols.csv'.format(task,mask)),
                              index_col=[0,1,2], header=0)
        edge_df.index.set_names(['wais', 'task', 'regressor'], inplace=True)
        for i in edge_df.index:
            edges = [pair for pair in edge_df.loc[i]['edges'].strip('[]').split('), (')]
            tuples = []
            if len(edges) > 1:
                for edge in edges:
                    edge_tup = (int(edge.strip('()').split(', ')[0]), int(edge.strip('()').split(', ')[1]))
                    tuples.append(edge_tup)
                np.savetxt(join(sink_dir, '{0}_{1}_{2}.tsv'.format(mask, task, i)), tuples, delimiter='\t')

In [None]:
iq_edge = edge_df.loc['deltaPRI', 'fci', 'iq']['edges']
iq_tup = (int(iq_edge.strip('()').split(', ')[0]), int(iq_edge.strip('()').split(', ')[1]))
tuples.append(edge_tup)

In [None]:
new_df = pd.Series(index=edge_df.index)
iq_edges = [pair for pair in edge_df.loc['deltaPRI', 'fci', 'iq']['edges'].strip('[]').split('), (')]
iq_tuples = []
if len(iq_edges) > 1:
    for edge in iq_edges:
        edge_tup = (int(edge.strip('()').split(', ')[0]), int(edge.strip('()').split(', ')[1]))
        iq_tuples.append(edge_tup)
iqs_edges = [pair for pair in edge_df.loc['deltaPRI', 'fci', 'iqXsex']['edges'].strip('[]').split('), (')]
iqs_tuples = []
if len(iqs_edges) > 1:
    for edge in iqs_edges:
        edge_tup = (int(edge.strip('()').split(', ')[0]), int(edge.strip('()').split(', ')[1]))
        iqs_tuples.append(edge_tup)
iqc_edges = [pair for pair in edge_df.loc['deltaPRI', 'fci', 'iqXclass']['edges'].strip('[]').split('), (')]
iqc_tuples = []
if len(iqc_edges) > 1:
    for edge in iqc_edges:
        edge_tup = (int(edge.strip('()').split(', ')[0]), int(edge.strip('()').split(', ')[1]))
        iqc_tuples.append(edge_tup)
iqsc_edges = [pair for pair in edge_df.loc['deltaPRI', 'fci', 'iqXsexXclass']['edges'].strip('[]').split('), (')]
iqsc_tuples = []
if len(iqsc_edges) > 1:
    for edge in iqsc_edges:
        edge_tup = (int(edge.strip('()').split(', ')[0]), int(edge.strip('()').split(', ')[1]))
        iqsc_tuples.append(edge_tup)

In [None]:
set(iqs_tuples) & set(iqc_tuples) & set(iqsc_tuples) & set(iq_tuples)

In [None]:
join(sink_dir, 'mediation_edges.csv')

In [None]:
big_df.columns.str.endswith('.2')

In [None]:
renamed_df = pd.read_csv(join(sink_dir, 'mediation_edges.csv'), sep='\t')
for i in np.arange(0,5):
    drops = renamed_df.columns.str.endswith('.{0}'.format(i))
    renamed_df.drop(renamed_df.columns[drops], axis=1, inplace=True)

In [None]:
renamed_df.to_csv(join(sink_dir, 'mediation_edges.csv'), sep='\t')