In [1]:
import pandas as pd 
import numpy as np 
import pingouin as pg 
import seaborn as sns
import matplotlib.pyplot as plt


In [6]:
# read in the metric files saved as csv
controls = pd.read_csv('../../DerivedData/extracted_diffusion_metrics_control_group.csv', index_col=0)
preterms = pd.read_csv('../../DerivedData/extracted_diffusion_metrics_preterm_group.csv', index_col=0)

### first get all regions pairs, metrics to be evaluated 
tract_names = np.unique(np.array([tract.split('_')[0] for tract in controls.columns[2:]]))
print('Number of evaluated bundles: {}'.format(len(tract_names)))
metrics = np.unique(np.array([tract.partition('_')[-1] for tract in controls.columns[2:]]))

### create pairing - as there is fewer preterms, use their IDs to find matches with controls 
matched = pd.read_csv('../../DerivedData/subject_matching.csv', index_col=0)
matched = matched[matched['preterm_ID'].isin(preterms['subject_id'].values)]

controls = controls[controls.subject_id.isin(matched.matched_ID_with_outcome.values)]

df = pd.concat([preterms, controls])

Number of evaluated bundles: 39


### Perform paired t-test adjusted for multiple comparison to determine whether LR asymmetries exist per bundle/metric

In [11]:
pairs = [['M1L-Brainstem', 'M1R-Brainstem'],
        ['M1L-CaudL', 'M1R-CaudR'], 
        ['M1L-LentiL', 'M1R-LentiR'],
        ['M1L-ParacentralL', 'M1R-ParacentralR'],
        ['M1L-SubthalL', 'M1R-SubthalR',], 
        ['M1L-ThalfusL', 'M1R-ThalfusR'], 
         ['ParacentralL-Brainstem', 'ParacentralR-Brainstem'],
         ['ParacentralL-CaudL', 'ParacentralR-CaudR'],
         ['ParacentralL-LentiL', 'ParacentralR-LentiR'],
         ['ParacentralL-SubthalL', 'ParacentralR-SubthalR'],
         ['ParacentralL-ThalfusL', 'ParacentralR-ThalfusR'],
         ['S1L-Brainstem', 'S1R-Brainstem'],
         ['S1L-CaudL', 'S1R-CaudR'], 
        ['S1L-LentiL', 'S1R-LentiR'],
        ['S1L-ParacentralL', 'S1R-ParacentralR'],
        ['S1L-SubthalL', 'S1R-SubthalR',], 
        ['S1L-ThalfusL', 'S1R-ThalfusR'], 
         ['S1L-M1L', 'S1R-M1R'],
         ['S1L-ParacentralL', 'S1R-ParacentralR']]


In [12]:
as_results = pd.DataFrame()

i = 0
for metric in metrics:
    for pair in pairs:
    
        col1 = pair[0]+'_'+metric
        col2 = pair[1]+'_'+metric
        #print(col1, col2)
        test = pg.ttest(x = df[col1].values, y=df[col2].values, paired=True)
        
        test[ 'region1'] = pair[0]
        test['region2'] = pair[1]
        test['metric'] = metric
        test['p-val'] = "{:.7f}".format(test['p-val'].values[0])
        
        test['region_1_mean'] = np.mean(df[col1].values)
        test['region_1_std'] = np.std(df[col1].values)
        
        test['region_2_mean'] = np.mean(df[col2].values)
        test['region_2_std'] = np.std(df[col2].values)
        
        
        as_results = as_results.append(test)

as_results['p-val'] = as_results['p-val'].astype(np.float64) 
as_results['p-val'] = np.round(as_results['p-val'],6)

reject, pvals_corr = pg.multicomp(as_results['p-val'].values, method='fdr_bh')
as_results['p-val_fdr_corrected'] = pvals_corr

reject, pvals_corr = pg.multicomp(as_results['p-val'].values, method='bonf')
as_results['p-val_bonf_corrected'] = pvals_corr

reject, pvals_corr = pg.multicomp(as_results['p-val'].values, method='holm')
as_results['p-val_holm_corrected'] = pvals_corr


In [13]:
pd.set_option('display.max_rows', None)
as_results.to_csv('../../Results/LR_diffusion_metric_asymmetries_stats.csv')


In [10]:
NODDI_results = pd.DataFrame()

## sanity check for pre and post processing NODDI 
for tract in tract_names:
    NDI = ['NDI_post', 'NDI_pre']
    ODI = ['ODI_post', 'ODI_pre']
    
    for metric in [NDI, ODI]:
        col1 = tract+'_'+metric[1]
        col2 = tract+'_'+metric[0]
        
        test = pg.ttest(x = df[col1].values, y=df[col2].values, paired=True)
        
        test[ 'region1'] = col1
        test['region2'] = col2
        
        test['p-val'] = "{:.7f}".format(test['p-val'].values[0])
        
        test['region_1_mean'] = np.mean(df[col1].values)
        test['region_1_std'] = np.std(df[col1].values)
        
        test['region_2_mean'] = np.mean(df[col2].values)
        test['region_2_std'] = np.std(df[col2].values)
        
        NODDI_results = NODDI_results.append(test)

NODDI_results['p-val'] = NODDI_results['p-val'].astype(np.float64) 
NODDI_results['p-val'] = np.round(NODDI_results['p-val'],6)

reject, pvals_corr = pg.multicomp(NODDI_results['p-val'].values, method='fdr_bh')
NODDI_results['p-val_fdr_corrected'] = pvals_corr

reject, pvals_corr = pg.multicomp(NODDI_results['p-val'].values, method='bonf')
NODDI_results['p-val_bonf_corrected'] = pvals_corr

reject, pvals_corr = pg.multicomp(NODDI_results['p-val'].values, method='holm')
NODDI_results['p-val_holm_corrected'] = pvals_corr        
        
NODDI_results.to_csv('../../Results/comparison_of_bundle_NODDI_metrics_pre_post_processing.csv')   