# RO1 Quick analysis

In [127]:
import os
import pickle as pkl
from collections import OrderedDict as od
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.multivariate.manova import MANOVA
import pandas as pd
import scipy as sp

In [14]:
# Load the different data for each metric
dAllConnTr=od()
dAllConnTe=od()
dAllYTr=od()
dAllYTe=od()

for sConn in ['Correlation', 'PartialCorrelation', 'Covariance', 'Precision', 'LSGC','Tangent']:
    [dXTrain, dXTest, aYTrain, aYTest] = pkl.load(open(os.path.join(
        '/project/bioinformatics/DLLab/Cooper/Code/AutismProject/',f'AlternateMetrics/{sConn}AllData.p'),'rb'))
    dAllConnTr.update({sConn: dXTrain})
    dAllConnTe.update({sConn: dXTest})
    dAllYTr.update({sConn: aYTrain})
    dAllYTe.update({sConn: aYTest})    

In [35]:
# Concatenate train and test data for group analysis
dAllConn=od()
for sKey in dAllConnTr.keys():
    dAllConn.update({sKey:{}})
    for sKey2 in dAllConnTr[sKey].keys():
        if sKey2=='anatomy':
            dAllConn[sKey].update({sKey2:
                                   [np.concatenate((dAllConnTr[sKey][sKey2],
                                                   dAllConnTe[sKey][sKey2]), axis=0),
                                   np.concatenate((dAllYTr[sKey],
                                                   dAllYTe[sKey]), axis=0)
                                   ]})
        elif sKey2=='connectivity':
            for sKey3 in dAllConnTr[sKey][sKey2].keys():
                dAllConn[sKey].update({sKey3:
                       # first 36 features are confounders (site location number)
                       [np.concatenate((dAllConnTr[sKey][sKey2][sKey3][:,36:],
                                       dAllConnTe[sKey][sKey2][sKey3][:,36:]), axis=0),
                       np.concatenate((dAllYTr[sKey],
                                       dAllYTe[sKey]), axis=0)
                       ]})
        else:
            continue

In [124]:
# def fMANOVATest(aX,aY):
#     aDep=np.concatenate((aX, aY), axis=0)
#     aIndep=np.concatenate((np.ones(aX.shape[0],1), np.zeros(aY.shape[0],1)),axis=0)
#     MANOVA(endog=aDep, exog=aIndep)
#     return MANOVA.mv_test()

In [126]:
# answer is not convering
# sKey='Correlation'
# sKey2='anatomy'

# aX=dAllConn[sKey][sKey2][0][idASD]
# aY=dAllConn[sKey][sKey2][0][idHC]

# aDep=np.concatenate((aX, aY), axis=0)
# aIndep=np.concatenate((np.ones((aX.shape[0],1)), np.zeros((aY.shape[0],1))), axis=0)

# cTest=MANOVA(endog=aDep, exog=aIndep)
# cTest.mv_test()
# d=pd.DataFrame(np.concatenate((aIndep, aDep), axis=1), columns=[f'ROI{x}' for x in range(aDep.shape[1]+1)])

# sFormula=list(d.columns[1:])
# sFormula=' + '.join(map(str, sFormula))
# sFormula=f'{sFormula} ~ ROI0'
# sFormula

# cOut=MANOVA.from_formula(sFormula, data=d)
# print(cOut.mv_test())

In [162]:
from statsmodels.stats.multitest import multipletests

In [191]:
def fTest(aX, aY, flThresh=0.05):
    lsP=list()
    nCorrected=aX.shape[1]
    for iTest in range(nCorrected):
        flTStat, flP = sp.stats.ttest_ind(aX[:,iTest], aY[:,iTest], equal_var=False)
        lsP.append(flP)
    aP=np.array(lsP)
    aP[np.isnan(aP)]=1
    aReject, aPCorrected=multipletests(aP, alpha=flThresh, method='fdr_bh')[:2]
    idxSig=np.where(aPCorrected<flThresh)
    return aP, idxSig

In [195]:
#Test for significance
dAllTestStats=od()
for sKey in dAllConn.keys():
    dAllTestStats.update({sKey:{}})
    for sKey2 in dAllConn[sKey].keys():
        aASD=dAllConn[sKey][sKey2][0][idASD]
        aHC=dAllConn[sKey][sKey2][0][idHC]
        aP, idxSig=fTest(aASD,aHC,flThresh=0.01)
        print(f'{idxSig[0].shape[0]}/{aP.shape[0]} significant differences between ASD and HC for {sKey2} features using {sKey}')
        dAllTestStats[sKey].update({sKey2: flTestStat})

3/243 significant differences between ASD and HC for anatomy features using Correlation
108/2080 significant differences between ASD and HC for basc064 features using Correlation
357/7503 significant differences between ASD and HC for basc122 features using Correlation
739/19503 significant differences between ASD and HC for basc197 features using Correlation
735/31125 significant differences between ASD and HC for craddock_scorr_mean features using Correlation
12/1176 significant differences between ASD and HC for harvard_oxford_cort_prob_2mm features using Correlation
25/780 significant differences between ASD and HC for msdl features using Correlation
410/34980 significant differences between ASD and HC for power_2011 features using Correlation
3/243 significant differences between ASD and HC for anatomy features using PartialCorrelation
0/2080 significant differences between ASD and HC for basc064 features using PartialCorrelation
1/7503 significant differences between ASD and HC f

In [194]:
#Test for significance
dAllTestStats=od()
for sKey in dAllConn.keys():
    dAllTestStats.update({sKey:{}})
    for sKey2 in dAllConn[sKey].keys():
        aASD=dAllConn[sKey][sKey2][0][idASD]
        aHC=dAllConn[sKey][sKey2][0][idHC]
        aP, idxSig=fTest(aASD,aHC,flThresh=0.05)
        print(f'{idxSig[0].shape[0]}/{aP.shape[0]} significant differences between ASD and HC for {sKey2} features using {sKey}')
        dAllTestStats[sKey].update({sKey2: flTestStat})

6/243 significant differences between ASD and HC for anatomy features using Correlation
272/2080 significant differences between ASD and HC for basc064 features using Correlation
889/7503 significant differences between ASD and HC for basc122 features using Correlation
2105/19503 significant differences between ASD and HC for basc197 features using Correlation
2256/31125 significant differences between ASD and HC for craddock_scorr_mean features using Correlation
39/1176 significant differences between ASD and HC for harvard_oxford_cort_prob_2mm features using Correlation
84/780 significant differences between ASD and HC for msdl features using Correlation
1461/34980 significant differences between ASD and HC for power_2011 features using Correlation
6/243 significant differences between ASD and HC for anatomy features using PartialCorrelation
0/2080 significant differences between ASD and HC for basc064 features using PartialCorrelation
2/7503 significant differences between ASD and H