In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.stats.mediation import Mediation
from os.path import join

In [2]:
def jili_sidak_mc(data, alpha):
    import math
    import numpy as np

    mc_corrmat = data.corr()
    mc_corrmat.fillna(0, inplace=True)
    eigvals, eigvecs = np.linalg.eig(mc_corrmat)

    M_eff = 0
    for eigval in eigvals:
        if abs(eigval) >= 0:
            if abs(eigval) >= 1:
                M_eff += 1
            else:
                M_eff += abs(eigval) - math.floor(abs(eigval))
        else:
            M_eff += 0
    print('Number of effective comparisons: {0}'.format(M_eff))

    #and now applying M_eff to the Sidak procedure
    sidak_p = 1 - (1 - alpha)**(1/M_eff)
    if sidak_p < 0.00001:
        print('Critical value of {:.3f}'.format(alpha),'becomes {:2e} after corrections'.format(sidak_p))
    else:
        print('Critical value of {:.3f}'.format(alpha),'becomes {:.6f} after corrections'.format(sidak_p))
    return sidak_p, M_eff

In [3]:
len([(99,100)])

1

In [4]:
def grab_corr(subjects, nodes, task, condition, session, atlas):
    if len(nodes) > 1:
        errors = pd.DataFrame(index=subjects, columns=nodes)
        edges = pd.DataFrame(index=subjects)
        for subject in subjects:
            try:
                if condition != None:
                    corrmat = np.genfromtxt(join(sink_dir, '{0}-session-{1}_{2}-{3}_{4}-corrmat.csv'.format(subject, 
                                                                                                            session, 
                                                                                                            task, 
                                                                                                            condition, 
                                                                                                            atlas)),
                                            delimiter=' ')
                else:
                    corrmat = np.genfromtxt(join(sink_dir, '{0}-session-{1}-{2}_network_corrmat_{3}.csv'.format(subject, 
                                                                                                            session, 
                                                                                                            task, 
                                                                                                            atlas)),
                                            delimiter=',')
                for pair in nodes:
                    node1 = pair[0] - 1
                    node2 = pair[1] - 1
                    edges.at[subject,'{0}_{1}_{2}.{3}_edge'.format(task, mask, pair[0], pair[1])] = corrmat[node1][node2]
                #post_retr_conn.at[subject] = np.ravel(corrmat, order='F')
            except Exception as e:
                print(e)
        
    else:
        errors = pd.Series(index=subjects)
        edges = pd.Series(index=subjects)
        node1 = nodes[0] - 1
        node2 = nodes[1] - 1
        for subject in subjects:
            try:
                if condition != None:
                    corrmat = np.genfromtxt(join(sink_dir, '{0}-session-{1}_{2}-{3}_{4}-corrmat.csv'.format(subject, 
                                                                                                            session, 
                                                                                                            task, 
                                                                                                            condition, 
                                                                                                            atlas)),
                                            delimiter=' ')
                else:
                    corrmat = np.genfromtxt(join(sink_dir, '{0}-session-{1}-{2}_network_corrmat_{3}.csv'.format(subject, 
                                                                                                            session, 
                                                                                                            task, 
                                                                                                            atlas)),
                                            delimiter=',')
                edges[subject] = corrmat[node1][node2]
                #post_retr_conn.at[subject] = np.ravel(corrmat, order='F')
            except Exception as e:
                print(e)
    return edges


In [5]:
subjects = [101, 102, 103, 104, 106, 107, 108, 110, 212, 213,
            214, 215, 216, 217, 218, 219, 320, 321, 322, 323,
            324, 325, 327, 328, 329, 330, 331, 332, 333, 334,
            335, 336, 337, 338, 339, 340, 341, 342, 343, 344,
            345, 346, 347, 348, 349, 350, 451, 452, 453, 455,
            456, 457, 458, 459, 460, 462, 463, 464, 465, 467,
            468, 469, 470, 502, 503, 571, 572, 573, 574, 575,
            577, 578, 579, 580, 581, 582, 584, 585, 586, 587,
            588, 589, 590, 591, 592, 593, 594, 595, 596, 597,
            598, 604, 605, 606, 607, 608, 609, 610, 611, 612,
            613, 614, 615, 616, 617, 618, 619, 620, 621, 622,
            623, 624, 625, 626, 627, 628, 629, 630, 631, 633,
            634]
#subjects = [101, 102, 103]

sink_dir = '/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output'
data_dir = '/Users/kbottenh/Dropbox/Projects/physics-retrieval/data'
roi_dir = '/Users/kbottenh/Dropbox/Data/templates/shen2015/'
fig_dir = '/Users/kbottenh/Dropbox/Projects/physics-retrieval/figures/'

shen = '/Users/kbottenh/Dropbox/Projects/physics-retrieval/shen2015_2mm_268_parcellation.nii.gz'
craddock = '/home/kbott006/physics-retrieval/craddock2012_tcorr05_2level_270_2mm.nii.gz'
masks = ['shen2015', 'craddock2012']

tasks = ['retr', 'fci']
conditions = ['phys', 'ctrl']
sessions = ['pre', 'post']

df = pd.read_csv(join(data_dir, 'physics-learning-tasks_graphtheory_shen+craddock_nodal.csv'), index_col=0, header=0)
df.rename({'Unnamed: 1': 'session', 'Unnamed: 2': 'task', 'Unnamed: 3': 'condition'}, axis=1, inplace=True)
null_df = pd.read_csv(join(sink_dir, 'local_efficiency', 'task_eff_dist.csv'), 
                      index_col=[0,1,2,3], header=0)

for i in np.arange(0,268)[::-1] :
    df.rename({'lEff{0}'.format(i): 'lEff{0}'.format(i+1)}, axis=1, inplace=True)
j = list(set(df.columns) - set(['session', 'task', 'condition', 'mask']))
j.sort()
conns = j


In [6]:
#load in all-data df

big_df = pd.read_csv(join(data_dir, 'rescored', 'non-brain-data+fd.csv'), index_col=0, header=0)

In [7]:
big_df.rename({'post phys retr fd': 'post_phys_retr_fd', 
               'post phys fci fd': 'post_phys_fci_fd',
               'Strt.Level': 'Strt_Level'}, axis=1, inplace=True)

In [8]:
for task in tasks:
    for mask in masks:
        print(mask)

        #effs = {'post phys fci': {'conns': fci_post_phys, 'iqs': ['deltaPRI', 'deltaFSIQ']},
        #        'post phys retr': {'conns': retr_post_phys, 'iqs': ['WMI2', 'VCI2']}}
        #iqs = effs['post phys fci']['iqs'] + effs['post phys retr']['iqs']
        #variables = ['iq', 'iqXsex', 'iqXclass', 'iqXsexXclass', 'sexXclass', 'F', 'Mod', 'Age', 'Strt.Level', 'fd']


        # # Regress local efficiency on IQ and all the covariates
        # Permuted OLS tests each `target_var` independently, while regressing out `confounding_vars`, 
        #so to run a multiple regression, we test each variable of interest, separately, and put all other 
        #variables in the regression in with the confounds. This way, we can test interactions <i>with</i> main effects.
        # <br><br>
        # Maximum p-values are saved in `sig` dictionary and for each significant variable, the p- and t-values 
        #for each node are saved in `nodaleff_sig`.
        # <br><br>
        # For each regression, maximum <i>p</i>- and <i>t</i>-values are stored in `params`, along with nodes 
        #whose local efficiency is significantly related to each parameter, are stored <i> by variable</i>.

        edge_df = pd.read_csv(join(sink_dir, '{0}-{1}_whole_brain-permuted_ols.csv'.format(task,mask)),
                              index_col=[0,1,2], header=0)
        edge_df.index.set_names(['wais', 'task', 'regressor'], inplace=True)

        for i in edge_df.index:
            edges = [pair for pair in edge_df.loc[i]['edges'].strip('[]').split('), (')]
            tuples = []
            if len(edges) > 1:
                for edge in edges:
                    edge_tup = (int(edge.strip('()').split(', ')[0]), int(edge.strip('()').split(', ')[1]))
                    tuples.append(edge_tup)
                edge_df.at[i,'edges'] = tuples
                if 'iq' in i[2]:
                    weights = grab_corr(subjects, tuples, i[1], 'Physics', 1, mask)

                    big_df = pd.concat([big_df, weights], axis=1)
                    
            else:
                #print('no edges for {0}'.format(i))
                pass
jili_sidak_mc(big_df.dropna(how='any'), 0.05)

shen2015
craddock2012
shen2015
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/213-session-1_fci-Physics_shen2015-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/217-session-1_fci-Physics_shen2015-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/322-session-1_fci-Physics_shen2015-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/329-session-1_fci-Physics_shen2015-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/332-session-1_fci-Physics_shen2015-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/348-session-1_fci-Physics_shen2015-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/452-session-1_fci-Physics_shen2015-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/456-session-1_fci-Physics_shen2015-corrmat.csv not found.
/

/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/575-session-1_fci-Physics_shen2015-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/579-session-1_fci-Physics_shen2015-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/580-session-1_fci-Physics_shen2015-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/582-session-1_fci-Physics_shen2015-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/590-session-1_fci-Physics_shen2015-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/611-session-1_fci-Physics_shen2015-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/616-session-1_fci-Physics_shen2015-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/628-session-1_fci-Physics_shen2015-corrmat.csv not found.
craddock2012
/Users/kbottenh/Dro

/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/322-session-1_fci-Physics_craddock2012-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/329-session-1_fci-Physics_craddock2012-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/332-session-1_fci-Physics_craddock2012-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/348-session-1_fci-Physics_craddock2012-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/452-session-1_fci-Physics_craddock2012-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/456-session-1_fci-Physics_craddock2012-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/457-session-1_fci-Physics_craddock2012-corrmat.csv not found.
/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/463-session-1_fci-Physics_craddock2012-corrmat.csv not found.


(0.0009164518731262783, 55.94378535461506)

In [9]:
big_df.to_csv(join(sink_dir, 'mediation_edges.csv'), sep='\t')

In [10]:
big_df

Unnamed: 0,GPA,Age,Handedness,Strt_Level,RetrPhyAcc1,Mean Correct RT Pre,RetrPhyAcc2,Mean Correct RT Post,FCIPhyAcc1,FCIPhyAcc2,...,fci_craddock2012_168.165_edge,fci_craddock2012_242.239_edge,fci_craddock2012_145.124_edge,fci_craddock2012_168.145_edge,fci_craddock2012_168.79_edge,fci_craddock2012_144.79_edge,fci_craddock2012_114.12_edge,fci_craddock2012_114.74_edge,fci_craddock2012_168.165_edge.1,fci_craddock2012_145.124_edge.1
101,3.07,20.471233,1.0,30.0,0.708333,4597.352941,0.750000,4280.277778,0.333333,0.444444,...,0.290882,0.122676,0.245757,0.273221,0.285093,0.185029,0.190638,0.159334,0.290882,0.245757
102,2.50,21.202740,0.8,30.0,0.541667,4241.769231,0.833333,4387.000000,0.222222,0.555556,...,0.259800,0.194451,0.232967,0.368456,0.320099,0.382195,0.605741,0.440693,0.259800,0.232967
103,3.35,18.887671,0.8,30.0,0.750000,4124.000000,0.875000,4388.571429,0.444444,0.777778,...,0.173227,0.496135,0.068915,0.308120,0.151836,0.303297,0.195823,0.345467,0.173227,0.068915
104,3.72,21.342466,1.0,40.0,0.666667,4792.250000,0.708333,4978.705882,0.444444,0.555556,...,0.482081,0.456754,0.204051,0.121034,0.297740,0.366782,-0.011580,0.152848,0.482081,0.204051
105,3.24,20.802740,0.5,20.0,0.707359,4583.059592,0.675318,4231.039582,0.240441,0.772431,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
629,4.00,18.682192,1.0,20.0,0.750000,4413.888889,0.708333,4012.941176,0.888889,0.888889,...,0.381538,0.168746,0.263662,0.303280,0.358878,0.373507,0.393566,0.320036,0.381538,0.263662
630,2.72,20.013699,1.0,20.0,0.666667,4790.750000,0.791667,4637.789474,0.666667,0.666667,...,0.425967,0.344308,0.389612,0.115085,0.150070,0.350042,0.213982,0.221622,0.425967,0.389612
631,3.84,20.854795,1.0,30.0,0.625000,4309.466667,0.625000,4600.200000,0.222222,0.444444,...,0.365738,0.237317,0.318086,0.018855,0.408577,0.415342,0.360849,0.258379,0.365738,0.318086
633,3.18,19.328767,0.8,20.0,0.833333,4453.700000,0.916667,4222.863636,0.777778,0.888889,...,0.446243,0.360159,0.189970,0.176822,0.400208,0.237639,0.225935,0.334694,0.446243,0.189970


In [11]:
for task in tasks:
    for mask in masks:
        print(mask)
        edge_df = pd.read_csv(join(sink_dir, '{0}-{1}_whole_brain-permuted_ols.csv'.format(task,mask)),
                              index_col=[0,1,2], header=0)
        edge_df.index.set_names(['wais', 'task', 'regressor'], inplace=True)
        for i in edge_df.index:
            edges = [pair for pair in edge_df.loc[i]['edges'].strip('[]').split('), (')]
            tuples = []
            if len(edges) > 1:
                for edge in edges:
                    edge_tup = (int(edge.strip('()').split(', ')[0]), int(edge.strip('()').split(', ')[1]))
                    tuples.append(edge_tup)
                np.savetxt(join(sink_dir, '{0}_{1}_{2}.tsv'.format(mask, task, i)), tuples, delimiter='\t')

shen2015
craddock2012
shen2015
craddock2012


In [12]:
iq_edge = edge_df.loc['deltaPRI', 'fci', 'iq']['edges']
iq_tup = (int(iq_edge.strip('()').split(', ')[0]), int(iq_edge.strip('()').split(', ')[1]))
tuples.append(edge_tup)

ValueError: invalid literal for int() with base 10: '[(168'

In [None]:
new_df = pd.Series(index=edge_df.index)
iq_edges = [pair for pair in edge_df.loc['deltaPRI', 'fci', 'iq']['edges'].strip('[]').split('), (')]
iq_tuples = []
if len(iq_edges) > 1:
    for edge in iq_edges:
        edge_tup = (int(edge.strip('()').split(', ')[0]), int(edge.strip('()').split(', ')[1]))
        iq_tuples.append(edge_tup)
iqs_edges = [pair for pair in edge_df.loc['deltaPRI', 'fci', 'iqXsex']['edges'].strip('[]').split('), (')]
iqs_tuples = []
if len(iqs_edges) > 1:
    for edge in iqs_edges:
        edge_tup = (int(edge.strip('()').split(', ')[0]), int(edge.strip('()').split(', ')[1]))
        iqs_tuples.append(edge_tup)
iqc_edges = [pair for pair in edge_df.loc['deltaPRI', 'fci', 'iqXclass']['edges'].strip('[]').split('), (')]
iqc_tuples = []
if len(iqc_edges) > 1:
    for edge in iqc_edges:
        edge_tup = (int(edge.strip('()').split(', ')[0]), int(edge.strip('()').split(', ')[1]))
        iqc_tuples.append(edge_tup)
iqsc_edges = [pair for pair in edge_df.loc['deltaPRI', 'fci', 'iqXsexXclass']['edges'].strip('[]').split('), (')]
iqsc_tuples = []
if len(iqsc_edges) > 1:
    for edge in iqsc_edges:
        edge_tup = (int(edge.strip('()').split(', ')[0]), int(edge.strip('()').split(', ')[1]))
        iqsc_tuples.append(edge_tup)

In [None]:
set(iqs_tuples) & set(iqc_tuples) & set(iqsc_tuples) & set(iq_tuples)

In [None]:
join(sink_dir, 'mediation_edges.csv')

In [14]:
big_df.columns.str.endswith('.2')

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [34]:
renamed_df = pd.read_csv(join(sink_dir, 'mediation_edges.csv'), sep='\t')
for i in np.arange(0,5):
    drops = renamed_df.columns.str.endswith('.{0}'.format(i))
    renamed_df.drop(renamed_df.columns[drops], axis=1, inplace=True)

In [36]:
renamed_df.to_csv(join(sink_dir, 'mediation_edges.csv'), sep='\t')