In [1]:
import pandas as pd 
import numpy as np 
import pingouin as pg
import matplotlib.pyplot as plt
import scipy
import scipy.cluster.hierarchy as sch
from scipy.stats import pearsonr
import seaborn as sns

In [2]:
extreme_mh_results = pd.read_csv('../../Results/mahalanabis_bundle_metrics_extreme_pt.csv', index_col=0)
moderate_mh_results = pd.read_csv('../../Results/mahalanabis_bundle_metrics_moderate_pt.csv', index_col=0)

  **kwargs


In [3]:
names = []

for subset in ['set1']:#, 'set2', 'set3']:
    for col in extreme_mh_results.columns:
        if subset in col:
            names.append(col.split('_')[0])
subset_names = [name+'_mh_set1' for name in names]

In [5]:
def get_outcomes(iFile='../../DerivedData/Global.csv' ):
    clinic = pd.read_csv(iFile, sep=';')
    to_keep = ['ParticipantID', 'Cognitive-CompositeScore', 'Communication-CompositeScore', 'Motor-CompositeScore','Cognitive-ScaledScore',
                 'ReceptiveCom-ScaledScore', 'ExpressiveCom-ScaledScore', 'FineMotor-ScaledScore','GrossMotor-ScaledScore']
    clinic= clinic[to_keep]

    clinic.rename( columns ={
                'ParticipantID' : 'subject_id',
                'Cognitive-CompositeScore' : 'Cognitive',
                'Communication-CompositeScore' : 'Language', 
                'Motor-CompositeScore' : 'Motor',
                }, inplace=True)
    
    return clinic

In [7]:
extreme_mh_results =  pd.merge(extreme_mh_results,  get_outcomes(), how='inner', on=['subject_id'])
moderate_mh_results =  pd.merge(moderate_mh_results,  get_outcomes(), how='inner', on=['subject_id'])

In [9]:
df = pd.concat([extreme_mh_results, moderate_mh_results])

In [12]:
subset_names

['M1-Brainstem_mh_set1',
 'S1-Brainstem_mh_set1',
 'Paracentral-Brainstem_mh_set1',
 'M1-Caud_mh_set1',
 'S1-Caud_mh_set1',
 'Paracentral-Caud_mh_set1',
 'M1-Lenti_mh_set1',
 'S1-Lenti_mh_set1',
 'Paracentral-Lenti_mh_set1',
 'M1-Thalfus_mh_set1',
 'S1-Thalfus_mh_set1',
 'Paracentral-Thalfus_mh_set1',
 'M1L-M1R_mh_set1',
 'S1L-S1R_mh_set1',
 'S1-M1_mh_set1']

In [13]:
outcomes = ['Cognitive', 'Language', 'Motor']

corr_with_outcome= pd.DataFrame()
r_with_outcome= pd.DataFrame()


pair_names = [
    'M1-Brainstem', 'S1-Brainstem','Paracentral-Brainstem',
    'M1-Caud', 'S1-Caud','Paracentral-Caud',
    'M1-Lenti', 'S1-Lenti','Paracentral-Lenti', 
    'M1-Thalfus', 'S1-Thalfus','Paracentral-Thalfus', 
    'M1L-M1R', 'S1L-S1R',
    'S1-M1' ]

i=0
for bundle in pair_names:
    
    corr_with_outcome.loc[i, 'bundle'] = bundle
    r_with_outcome.loc[i, 'bundle'] = bundle
    
    for subset in ['set1', 'set2', 'set3']:
        for score in outcomes:
    
            test = pg.corr(df[bundle + '_mh_'+ subset], df[score])
            
            corr_with_outcome.loc[i, score+'_'+subset] = np.round(test['p-val'][0],5)
            r_with_outcome.loc[i, score+'_'+subset] = np.round(test['r'][0],5)
    i = i +1 
    
corr_with_outcome[corr_with_outcome.columns[1:]] = corr_with_outcome[corr_with_outcome.columns[1:]].astype(np.float64)
pre_p = corr_with_outcome[corr_with_outcome.columns[1:]].values.ravel()
reject, pvals_corr = pg.multicomp(pre_p, method='fdr_bh')
pvals_corr = pvals_corr.reshape(np.shape(corr_with_outcome[corr_with_outcome.columns[1:]].values))
corr_with_outcome[corr_with_outcome.columns[1:]] = pvals_corr

In [15]:
corr_with_outcome.to_csv('../../Results/MH_correlation_p_with_composite_scores.csv')
r_with_outcome.to_csv('../../Results/MH_correlation_r_with_composite_scores.csv')

In [17]:
outcomes = ['Cognitive-ScaledScore', 'ReceptiveCom-ScaledScore', 'ExpressiveCom-ScaledScore',
'FineMotor-ScaledScore','GrossMotor-ScaledScore']

corr_with_outcome= pd.DataFrame()
r_with_outcome= pd.DataFrame()
i=0
for bundle in pair_names:
    
    corr_with_outcome.loc[i, 'bundle'] = bundle
    
    for subset in ['set1', 'set2', 'set3']:
        for score in outcomes:
    
            test = pg.corr(df[bundle + '_mh_'+ subset], df[score])
            
            corr_with_outcome.loc[i, score+'_'+subset] = np.round(test['p-val'][0],5)
            r_with_outcome.loc[i, score+'_'+subset] = np.round(test['r'][0],5)
    i = i +1 
    
corr_with_outcome[corr_with_outcome.columns[1:]] = corr_with_outcome[corr_with_outcome.columns[1:]].astype(np.float64)
pre_p = corr_with_outcome[corr_with_outcome.columns[1:]].values.ravel()
reject, pvals_corr = pg.multicomp(pre_p, method='fdr_bh')
pvals_corr = pvals_corr.reshape(np.shape(corr_with_outcome[corr_with_outcome.columns[1:]].values))
corr_with_outcome[corr_with_outcome.columns[1:]] = pvals_corr

In [18]:
corr_with_outcome.to_csv('../../Results/MH_correlation_p_with_scaled_scores.csv')
r_with_outcome.to_csv('../../Results/MH_correlation_r_with_scaled_scores.csv')

In [6]:
def get_clinical_variables(iFile='../../DerivedData/Global.csv'):
    clinic = pd.read_csv(iFile, sep=';')
    
    #to_keep = ['ParticipantID', 'FetalGrowthRestriction', 'ParenteralNutrition>21d', 'Pregnancy-size', 
    #      'Sex', 'Gabirth', 'PMA-MRI']
    
    #to_keep = ['ParticipantID', 'FetalGrowthRestriction', 'Gabirth', 'PMA-MRI']
    to_keep = ['ParticipantID', 'Gabirth', 'PMA-MRI']
    clinic = clinic[to_keep]
    clinic.rename( columns ={
                'ParticipantID' : 'subject_id',
                'ParenteralNutrition>21d' : 'ParenteralNutrition',
                'Pregnancy-size' : 'PregnancySize', 
                'Gabirth' : 'birth_age',
                'PMA-MRI' : 'scan_age'
                }, inplace=True)
    
    ## correct excel commas in floats
    for col in ['birth_age', 'scan_age']:
        clinic[col] = clinic[col].astype(str)
        clinic[col] = clinic[col].apply(lambda x: x.replace(',','.'))
        clinic[col] = clinic[col].astype(np.float16)
        
    ### categorize what needs to be categorized

    #for col in ['ParenteralNutrition','PregnancySize', 'FetalGrowthRestriction', 'Sex']:
    #for col in [ 'FetalGrowthRestriction']:
    #    clinic[col] = clinic[col].astype('category')
    #    clinic[col] = clinic[col].cat.codes
   
    return clinic

extreme_mh_results =  pd.merge(extreme_mh_results,  get_clinical_variables(), how='inner', on=['subject_id'])
moderate_mh_results =  pd.merge(moderate_mh_results,  get_clinical_variables(), how='inner', on=['subject_id'])

In [8]:
pair_names = [
    'M1-Brainstem', 'S1-Brainstem','Paracentral-Brainstem',
    'M1-Caud', 'S1-Caud','Paracentral-Caud',
    'M1-Lenti', 'S1-Lenti','Paracentral-Lenti', 
    'M1-Thalfus', 'S1-Thalfus','Paracentral-Thalfus', 
    'M1L-M1R', 'S1L-S1R',
    'S1-M1' ]

set_names = ['set1', 'set2', 'set3']

In [20]:
ext_res = pd.DataFrame()
i=0
for tract in pair_names: 
    for s in set_names:
        col= tract +'_mh_' + s
        lr = pg.linear_regression(extreme_mh_results[col].values, extreme_mh_results['birth_age'].values)
        #print(lr)
        ext_res.loc[i,'tract'] = tract
        ext_res.loc[i,s+'_T'] = lr['T'].values[0]
        ext_res.loc[i,s+'_p'] = lr['pval'].values[0]
        
    i=i+1
    
p_cols = [col for col in ext_res.columns if 'p' in col]
pre_p = ext_res[p_cols].values.ravel()
_, pvals_corr = pg.multicomp(pre_p, method='fdr_bh')
ext_res[p_cols] = pvals_corr.reshape(np.shape(ext_res[p_cols].values))


In [21]:
ext_res

Unnamed: 0,tract,set1_T,set1_p,set2_T,set2_p,set3_T,set3_p
0,M1-Brainstem,32.423772,7.412961e-25,30.699896,2.0349219999999998e-24,33.110208,6.7263860000000005e-25
1,S1-Brainstem,35.500947,1.270454e-25,35.590245,1.270454e-25,29.038176,8.997754e-24
2,Paracentral-Brainstem,32.834274,6.7263860000000005e-25,32.134591,8.583361e-25,36.21819,1.270454e-25
3,M1-Caud,25.884858,1.923771e-22,25.873192,1.923771e-22,31.061619,1.590076e-24
4,S1-Caud,25.889645,1.923771e-22,27.172394,5.410447e-23,24.591169,7.771677e-22
5,Paracentral-Caud,29.866036,4.04417e-24,30.561602,2.219513e-24,31.714717,9.565762e-25
6,M1-Lenti,30.747503,2.0349219999999998e-24,30.509908,2.229128e-24,31.77182,9.565762e-25
7,S1-Lenti,26.076388,1.721974e-22,26.587974,9.989694e-23,23.277128,3.72638e-21
8,Paracentral-Lenti,28.010874,2.4379630000000003e-23,28.717886,1.2037560000000001e-23,31.980271,8.811007e-25
9,M1-Thalfus,31.539908,1.063247e-24,32.384705,7.412961e-25,34.283175,2.9218240000000003e-25


In [23]:
mod_res = pd.DataFrame()
i=0
for tract in pair_names: 
    for s in set_names:
        col= tract +'_mh_' + s
        lr = pg.linear_regression(moderate_mh_results[col].values, moderate_mh_results['birth_age'].values)
        #print(lr)
        mod_res.loc[i,'tract'] = tract
        mod_res.loc[i,s+'_T'] = lr['T'].values[0]
        mod_res.loc[i,s+'_p'] = lr['pval'].values[0]
        
    i=i+1
    
p_cols = [col for col in mod_res.columns if 'p' in col]
pre_p = mod_res[p_cols].values.ravel()
_, pvals_corr = pg.multicomp(pre_p, method='fdr_bh')
mod_res[p_cols] = pvals_corr.reshape(np.shape(mod_res[p_cols].values))


In [27]:
mod_res


Unnamed: 0,tract,set1_T,set1_p,set2_T,set2_p,set3_T,set3_p
0,M1-Brainstem,75.570053,3.144617e-29,64.897414,2.8442590000000004e-28,64.961877,2.8442590000000004e-28
1,S1-Brainstem,81.596277,8.372977e-30,73.530778,4.032783e-29,56.501578,5.4201790000000006e-27
2,Paracentral-Brainstem,83.548927,8.372977e-30,74.729852,3.424686e-29,70.685004,6.044593e-29
3,M1-Caud,71.985691,5.152908000000001e-29,63.412294,4.612696e-28,73.896637,3.9796860000000004e-29
4,S1-Caud,75.417224,3.144617e-29,70.715105,6.044593e-29,67.946255,1.33068e-28
5,Paracentral-Caud,60.531639,1.1986620000000001e-27,59.305208,1.898672e-27,70.539027,6.044593e-29
6,M1-Lenti,58.431742,2.631381e-27,52.469428,3.083382e-26,66.268404,1.933977e-28
7,S1-Lenti,64.190159,3.5670960000000004e-28,62.077512,6.966426e-28,63.05269,5.112704e-28
8,Paracentral-Lenti,67.79007,1.3389100000000002e-28,61.904907,7.22592e-28,73.236952,4.034296e-29
9,M1-Thalfus,71.577288,5.481581e-29,69.046935,9.542821e-29,67.497252,1.355615e-28
