In [1]:
import pandas as pd 
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt
from soma import aims

In [4]:
# read in the metric files saved as csv
controls = pd.read_csv('../../DerivedData/extracted_diffusion_metrics_control_group.csv', index_col=0)
preterms = pd.read_csv('../../DerivedData/extracted_diffusion_metrics_preterm_group.csv', index_col=0)

### first get all regions pairs, metrics to be evaluated 
tract_names = np.unique(np.array([tract.split('_')[0] for tract in controls.columns[2:]]))
print('Number of evaluated bundles: {}'.format(len(tract_names)))
metrics = np.unique(np.array([tract.partition('_')[-1] for tract in controls.columns[2:]]))

### create pairing - as there is fewer preterms, use their IDs to find matches with controls 
matched = pd.read_csv('../../DerivedData/subject_matching.csv', index_col=0)
matched = matched[matched['preterm_ID'].isin(preterms['subject_id'].values)]

#sanity check: 
if len(preterms) == len(matched):
    print('Number of subject pairs: {}'.format(len(preterms)))
else:
    print('Someting happened with matching')
    
# get age at birth info for the subgrouping
ages = pd.read_csv('../../SourceData/release3_subject_info.tsv', sep='\t')

matched['preterm_birth_age'] = 0.
matched['control_birth_age'] = 0.
for i, row in matched.iterrows():
    matched.at[i, 'preterm_birth_age']  = ages[ages['participant_id '] == row['preterm_ID']+' ']['birth_age '].values[0]
    matched.at[i, 'control_birth_age']  = ages[ages['participant_id '] == row['matched_ID_with_outcome']+' ']['birth_age '].values[0]
    
### creates subgrouped pairs:
extreme_pairs = matched[matched.preterm_birth_age < 32][['preterm_ID','matched_ID_with_outcome']]
moderate_pairs = matched[matched.preterm_birth_age >= 32][['preterm_ID','matched_ID_with_outcome']]

extreme_pairs.rename(columns={'matched_ID_with_outcome': 'matched_ID'}, inplace=True)
moderate_pairs.rename(columns={'matched_ID_with_outcome': 'matched_ID'}, inplace=True)

# sanity check 
if (len(extreme_pairs) + len(moderate_pairs)) == len(matched):
    print('Extreme group: N = {}\nModerate group: N = {}'.format(len(extreme_pairs), len(moderate_pairs)))
else:
    print('Grouping wrong')
    
    

Number of evaluated bundles: 39
Number of subject pairs: 59
Extreme group: N = 33
Moderate group: N = 26


In [5]:
len(moderate_pairs)

26

In [13]:
for i, row in extreme_pairs.iterrows():

    for subj in ['preterm', 'control']:
        if subj == 'preterm':
            subj_id = row[0]
            ses_id = matched[matched.preterm_ID == subj_id].preterm_session.values[0]
        else: 
            subj_id = row[1]
            ses_id = matched[matched.matched_ID_with_outcome == subj_id].matched_session_with_outcome.values[0]
            
        print(subj_id, ses_id)
        ## WM = 41 & 2 
        iRibbon = '/neurospin/grip/external_databases/dHCP_CR_JD_2018/Projects/rel3_dhcp_dmri_shard_pipeline_analysis/sub-{}/ses-{}/sub-{}_ses-{}_desc-ribbon_dseg_shardDMRI_space.nii.gz'.format(subj_id,ses_id, subj_id, ses_id)
        wm_ar = aims.read(iRibbon).arraydata()[0]
        
        wm_mask = np.zeros_like(wm_ar)
        wm_mask[wm_ar == 41] = 1
        wm_mask[wm_ar == 2] = 1
        
        for metric in ['AD', 'RD', 'MD', 'FA', 'NDI', 'ODI']:
            
            m_map = read_in_metric(subj_ID=subj_id, ses_ID=ses_id, metric=metric)
            
            m_map = m_map * wm_mask
            #print(metric, np.mean(m_map))
            
            #### mean only over the ==1 voxels! 
            
            extreme_pairs.loc[i, subj+'_'+metric] = np.mean(m_map[m_map!=0])
        
        

('CC00830XX14', 30710)
('CC00939XX24', 36230)
('CC00136AN13', 64201)
('CC00184XX12', 60501)
('CC00855XX14', 530)
('CC00589XX21', 184000)
('CC00838XX22', 30610)
('CC00100XX01', 35000)
('CC00997BN25', 56430)
('CC00852XX11', 28210)
('CC00301XX04', 113001)
('CC00427XX15', 130100)
('CC00867XX18', 8930)
('CC00586XX18', 179000)
('CC00770XX12', 1100)
('CC00716XX15', 222800)
('CC00628XX18', 193500)
('CC01198XX20', 140930)
('CC00525XX14', 165900)
('CC00111XX04', 37002)
('CC00672BN13', 214800)
('CC00383XX13', 121800)
('CC00305XX08', 115700)
('CC00477XX16', 141600)
('CC00526XX15', 169900)
('CC00865XX16', 12330)
('CC00284AN13', 111400)
('CC00364XX10', 115200)
('CC00227XX13', 92100)
('CC00424XX12', 129400)
('CC00389XX19', 133800)
('CC00353XX07', 111000)
('CC00621XX11', 195900)
('CC00500XX05', 145900)
('CC00489XX20', 142900)
('CC00122XX07', 42000)
('CC00245AN15', 94300)
('CC00663XX12', 195000)
('CC00245BN15', 94400)
('CC00527XX16', 184400)
('CC00661XX10', 209600)
('CC00537XX18', 157100)
('CC00518XX15

In [15]:
extreme_pairs.to_csv('../../DerivedData/extreme_pairs_mean_diffusion_metrics_over_WM.csv')

In [17]:
len(moderate_pairs)

26

In [2]:
def read_in_metric(subj_ID, ses_ID, metric):
    
    if metric == 'AD':
        
        iMetric = '/neurospin/grip/external_databases/dHCP_CR_JD_2018/release3/dhcp_dmri_shard_pipeline/sub-{}/ses-{}/dwi/DTI/dtifit_b1000/sub-{}_ses-{}_L1.nii.gz'.format(
                    subj_ID, ses_ID, subj_ID, ses_ID)
        metric_map = aims.read(iMetric).arraydata()[0]
    elif metric == 'FA':
        iMetric = '/neurospin/grip/external_databases/dHCP_CR_JD_2018/release3/dhcp_dmri_shard_pipeline/sub-{}/ses-{}/dwi/DTI/dtifit_b1000/sub-{}_ses-{}_FA.nii.gz'.format(
                    subj_ID, ses_ID, subj_ID, ses_ID)
        metric_map = aims.read(iMetric).arraydata()[0]
        
    elif metric == 'RD':
        
        iMetric2 = '/neurospin/grip/external_databases/dHCP_CR_JD_2018/release3/dhcp_dmri_shard_pipeline/sub-{}/ses-{}/dwi/DTI/dtifit_b1000/sub-{}_ses-{}_L2.nii.gz'.format(
                    subj_ID, ses_ID, subj_ID, ses_ID)
        iMetric3 = '/neurospin/grip/external_databases/dHCP_CR_JD_2018/release3/dhcp_dmri_shard_pipeline/sub-{}/ses-{}/dwi/DTI/dtifit_b1000/sub-{}_ses-{}_L3.nii.gz'.format(
                    subj_ID, ses_ID, subj_ID, ses_ID)
        
        m2 = aims.read(iMetric2).arraydata()[0]
        m3 = aims.read(iMetric3).arraydata()[0]
        
        metric_map = (m2 + m3)/2
        
    elif metric == 'MD':
        iMetric1 = '/neurospin/grip/external_databases/dHCP_CR_JD_2018/release3/dhcp_dmri_shard_pipeline/sub-{}/ses-{}/dwi/DTI/dtifit_b1000/sub-{}_ses-{}_L1.nii.gz'.format(
                    subj_ID, ses_ID, subj_ID, ses_ID)
        iMetric2 = '/neurospin/grip/external_databases/dHCP_CR_JD_2018/release3/dhcp_dmri_shard_pipeline/sub-{}/ses-{}/dwi/DTI/dtifit_b1000/sub-{}_ses-{}_L2.nii.gz'.format(
                    subj_ID, ses_ID, subj_ID, ses_ID)
        iMetric3 = '/neurospin/grip/external_databases/dHCP_CR_JD_2018/release3/dhcp_dmri_shard_pipeline/sub-{}/ses-{}/dwi/DTI/dtifit_b1000/sub-{}_ses-{}_L3.nii.gz'.format(
                    subj_ID, ses_ID, subj_ID, ses_ID)
        
        m1 = aims.read(iMetric1).arraydata()[0]
        m2 = aims.read(iMetric2).arraydata()[0]
        m3 = aims.read(iMetric3).arraydata()[0]
        
        metric_map = (m1 + m2 + m3)/3
        
    elif metric == 'NDI':
        iMetric = '/neurospin/grip/external_databases/dHCP_CR_JD_2018/release3/dhcp_dmri_shard_pipeline/sub-{}/ses-{}/dwi/NODDI/mean_fintra.nii.gz'.format(
                    subj_ID, ses_ID)
        metric_map = aims.read(iMetric).arraydata()[0]
        
    elif metric == 'ODI':
        iMetric = '/neurospin/grip/external_databases/dHCP_CR_JD_2018/release3/dhcp_dmri_shard_pipeline/sub-{}/ses-{}/dwi/NODDI/OD.nii.gz'.format(
                    subj_ID, ses_ID)
        metric_map = aims.read(iMetric).arraydata()[0]
        
    return metric_map