In [1]:
import pandas as pd
import seaborn as sns 
import matplotlib.pyplot as plt 
import numpy as np
import os 

# Subject exclusion: overall

In [2]:
df = pd.read_csv('../../SourceData/release3_subject_info.tsv', sep='\t')

## first drop records for multi session subjects keeping the first session (higher id)
#df = df.loc[df.groupby('participant_id ')['session_id '].idxmax()]
df = df.loc[df.groupby('participant_id ')['session_id '].idxmin()]

print('dMRI')
print('Number of single sublects: {}'.format(len(df)))

## keep only columns that are of interest (no eddy, smri, fmri)
to_keep = [col for col in df.columns if 'fmri' not in col]
to_keep = [col for col in to_keep if 'eddy' not in col]
to_keep = [col for col in to_keep if 'smri' not in col]

df = df[to_keep] 

## remove rows that failed visual assessment 
df = df[df['qc_dmri_shard_fail_visual_qc '] == 'False ']
print('Number of subjects with dMRI that passes visual QC: {}'.format(len(df)))

## otherwise missing 
df = df[ ~ df['qc_dmri_shard_snr '].isna()]
print('Number without otherwise missing: {}'.format(len(df)))

## remove subjects with radiological score < 4
df = df[df['radiology_score '] <  4]
print('Number of subjects with radiological score 0-3: {}'.format(len(df)))

## OUTCOMES
print('\nOUTCOMES')
outcomes = pd.read_csv('../../SourceData/subject_outcomes.csv')
outcomes = outcomes.dropna(subset=outcomes.columns[1:])
print('Number of unique subjects with OUTCOMES: {}'.format(len(np.unique(outcomes['Participant ID'].values))))

outcomes.rename(columns={'Participant ID': 'participant_id ', 
                        'Composite Score': 'Cognitive Composite Score'
                        }, inplace=True)

outcomes = outcomes[['participant_id ', 'Cognitive Composite Score', 
                     'Communication Composite Score', 'Motor Composite Score' ]]

## remove subjects that are outside 17-20 range for age at assessment
a = pd.read_csv('../../SourceData/DHCPNDH1-TomArichiSaraNeumane_DATA_2021-12-16_1359.csv', low_memory=False)
a = a[['participationid','age_at_assess_m']].dropna()
a = a[(a.age_at_assess_m >= 17) & (a.age_at_assess_m <= 20)]
within_range = a.participationid.values

outcomes = outcomes[outcomes['participant_id '].isin(within_range)]
print('Number of unique subjects with OUTCOMES within 17-20 months range: {}'.format(
                len(np.unique(outcomes['participant_id '].values))))



print('\nINTERSECTION')

subj_ids =outcomes['participant_id '].values
subj_ids = [subj_id+' ' for subj_id in subj_ids]

df = df[df['participant_id '].isin(subj_ids)]
print('Number of subjects with dMRI & radiological score 1-3 & outcome: {}'.format(len(df)))

df['participant_id '] = df['participant_id '].str.replace(' ', '')
df = pd.merge(df, outcomes, on='participant_id ', how='inner')

df['birth_age '] = df['birth_age '].astype(np.float32)
df['scan_age '] = df['scan_age '].astype(np.float32)

df['group'] = 'FT'
df.loc[df['birth_age '] < 37, 'group'] = 'PT'

df['template'] = 39
df.loc[df['scan_age '] < 37.5, 'template'] = 36
df.loc[df['scan_age '] < 34.5, 'template'] = 33


dMRI
Number of single sublects: 783
Number of subjects with dMRI that passes visual QC: 673
Number without otherwise missing: 658
Number of subjects with radiological score 0-3: 591

OUTCOMES
Number of unique subjects with OUTCOMES: 726
Number of unique subjects with OUTCOMES within 17-20 months range: 576

INTERSECTION
Number of subjects with dMRI & radiological score 1-3 & outcome: 378


### Quantify number of subjects per template:

In [3]:
df.groupby('template').count()['participant_id ']

template
33     18
36     42
39    318
Name: participant_id , dtype: int64

In [4]:
## write subject, id files for DTI etc 
#f = '../../DerivedData/replication_cohort_all_subjects_new.csv'
#if os.path.exists(f):
#    os.remove(f)
    
#df2 = df[['participant_id ', 'session_id ', 'template']]
#df2.to_csv(f, index=False, header=False)

**there are 8 subjects whose anat is missing in the dHCP**<br>
**remove them before the plotting as they won't appear in the Cohort A**

In [5]:
final_sub_list = pd.read_csv('../../DerivedData/replication_cohort_all_subjects_current.csv', 
                                         names=['subject_id', 'session_id', 'template'])
df = df[df['participant_id '].isin(final_sub_list.subject_id.values)]
df.groupby('template').count()['participant_id ']

template
33     18
36     41
39    311
Name: participant_id , dtype: int64

In [10]:
### get 3 youngest and 3 oldes PMA at scan for registration QC
df.sort_values(by='scan_age ')[:]

Unnamed: 0,participant_id,session_id,scan_number,singleton,sedation,birth_age,scan_age,sex,birth_weight,head_circumference_scan,...,qc_dmri_shard_rotation,qc_dmri_shard_outlier_ratio,qc_dmri_shard_fail_visual_qc,qc_dmri_shard_comments,qc_dmri_shard_comments_recon,Cognitive Composite Score,Communication Composite Score,Motor Composite Score,group,template
282,CC00657XX14,193700,1,S,False,28.142857,29.860001,male,1.255,28.0,...,0.194331,0.153525,False,,,80.0,86.0,91.0,PT,33
272,CC00618XX16,177201,1,S,False,27.428572,29.860001,female,0.760,24.4,...,0.091927,0.058931,False,,,85.0,112.0,100.0,PT,33
158,CC00389XX19,119100,1,S,False,28.714285,29.860001,male,0.825,25.5,...,0.271315,0.188976,False,,,95.0,94.0,91.0,PT,33
323,CC00830XX14,18910,1,S,False,30.428572,31.139999,female,1.350,28.0,...,0.097038,0.234562,False,,,100.0,91.0,94.0,PT,33
111,CC00284BN13,90801,1,M,False,30.714285,32.290001,male,1.050,25.5,...,0.074730,0.108171,False,,,100.0,115.0,97.0,PT,33
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,CC00286XX15,91700,1,S,True,40.857143,44.430000,male,4.000,38.5,...,0.073637,0.213666,False,,,100.0,79.0,97.0,FT,39
74,CC00184XX12,60501,1,S,False,41.285713,44.709999,female,3.700,36.5,...,0.184540,0.140481,False,,,100.0,91.0,97.0,FT,39
369,CC01145XX16,98330,1,S,False,37.714287,44.709999,female,2.470,35.8,...,0.337133,0.234539,False,,,105.0,97.0,115.0,FT,39
77,CC00186BN14,61000,1,M,False,36.428570,45.139999,female,1.530,36.0,...,0.258819,0.303875,False,,,110.0,83.0,100.0,PT,39


# Subject exclusion:  clinical information

In [11]:
#!pip install openpyxl 

df = pd.read_csv('../../DerivedData/replication_cohort_all_subjects_current.csv', names=['subject_id', 'session_id', 'template'])
# remove the bad metrics
bad = pd.read_csv('../../DerivedData/failed_metric_QC.csv', names=['subject_id'])
df = df[~ df.subject_id.isin(bad.subject_id.values)]

## get clinical data for descriptive tables 

In [12]:
clinical = pd.DataFrame()

for i, row in df.iterrows():
    clinical.loc[i, 'subject_id'] = row.subject_id 
    clinical.loc[i, 'session_id'] = row.session_id
    clinical.loc[i, 'template'] = row.template
    
clinical['session_id'] = clinical['session_id'].astype(int)

In [13]:
info = pd.read_csv('../../SourceData/release3_subject_info.tsv', sep='\t')

for i, row in df.iterrows():
    
    clinical.loc[i, 'PMA_scan'] = info[(info['participant_id '] == row.subject_id+' ') & (info['session_id '] == row.session_id)]['scan_age '].values[0]
    clinical.loc[i, 'GA_birth'] = info[(info['participant_id '] == row.subject_id+' ') & (info['session_id '] == row.session_id)]['birth_age '].values[0]
    clinical.loc[i, 'Sex'] = info[(info['participant_id '] == row.subject_id+' ') & (info['session_id '] == row.session_id)]['sex '].values[0]
    clinical.loc[i, 'Weight_birth'] = info[(info['participant_id '] == row.subject_id+' ') & (info['session_id '] == row.session_id)]['birth_weight '].values[0]
    clinical.loc[i, 'Radiology_score'] = info[(info['participant_id '] == row.subject_id+' ') & (info['session_id '] == row.session_id)]['radiology_score '].values[0]

##### for the other ones I will need the clinical data file from Sara/Tom

In [14]:
info = pd.read_csv('../../SourceData/descriptive_data_labels.csv', low_memory=False)
info = info.dropna(axis=1, how='all')
info = info[info['Participant ID'].isin(df.subject_id.values)]

In [15]:
## race 
ethnicity = info.dropna(subset=["Mother's ethnicity","Father's ethnicity"])
ethnicity = ethnicity[['Participant ID', 'Event Name', "Mother's ethnicity","Father's ethnicity"]]

for i,row in df.iterrows():
    
    mother = ethnicity[ethnicity['Participant ID'] == row.subject_id]["Mother's ethnicity"].values[0]
    father = ethnicity[ethnicity['Participant ID'] == row.subject_id]["Father's ethnicity"].values[0]
    
    mother = mother.split('-')[0].rstrip()
    father = father.split('-')[0].rstrip()
    
    
    clinical.loc[i, 'ethnicity_mother'] = mother
    clinical.loc[i, 'ethnicity_father'] = father
    
    #if mother == father:
    #    clinical.loc[i, 'Race'] = mother
        
    #else: 
    #    clinical.loc[i, 'Race'] = 'Other'
    
## mode of delivery 

ethnicity = info[['Participant ID', 'Event Name', "Baby delivery method","Mode of delivery"]]
ethnicity = ethnicity[ethnicity['Event Name'] == 'Baby Born']

for i,row in df.iterrows():
    delivery = ethnicity[ethnicity['Participant ID'] == row.subject_id]["Baby delivery method"].values[0]
    
    if delivery in ['Elective caesarean section']:
        clinical.loc[i, 'Delivery'] = 'C_el'
    if delivery in ['Emergency caesarean section - in labour', 
                   'Emergency caesarean section - not in labour']:
        clinical.loc[i, 'Delivery'] = 'C_em'
    elif delivery in ['Instrumental delivery - Forceps','Instrumental delivery - Ventous' ]:
        clinical.loc[i, 'Delivery'] = 'I'
    else:
        clinical.loc[i, 'Delivery'] = 'V'

##### exclusion criteria 

In [16]:
## radiology score - from subjects tsv 

## drug/alcohol abuse 
cols = ['Participant ID','Does the mother currently smoke?', 'Does the mother currently drink alcohol']
ethnicity = info[cols].dropna()

for i,row in df.iterrows():
    
    smoke = ethnicity[ethnicity['Participant ID'] == row.subject_id]['Does the mother currently smoke?'].values[0]
    if smoke != 'Yes':
        smoke = 'No'
    clinical.loc[i, 'Current_smore'] = smoke    
    alcohol = ethnicity[ethnicity['Participant ID'] == row.subject_id]['Does the mother currently drink alcohol'].values[0]    
    if alcohol != 'Yes':
        alcohol = 'No'
    clinical.loc[i, 'Current_alcohol'] = alcohol     
    if alcohol == 'No' and smoke == 'No':
        clinical.loc[i, 'Current_smoke_alcohol'] = 'No'
    else:
        clinical.loc[i, 'Current_smoke_alcohol'] = 'Yes'
            
## hypoxic–ischemic encephalopathy 
for i,row in df.iterrows():
    clinical.loc[i, 'HIE' ] = 'No'


## lung disease, or bronchopulmonary dysplasia 

ethnicity = info[['Participant ID', 'Total number of days when baby recorded as receiving any oxygen by any means']].dropna()
for i, row in df.iterrows():
    
    oxygen = ethnicity[ethnicity['Participant ID'] == row.subject_id]['Total number of days when baby recorded as receiving any oxygen by any means'].values
    
    if len(oxygen) > 0:
        oxygen = oxygen[0]
    else: 
        oxygen = 0.
    if oxygen > 90:
        oxygen = 0
        
    clinical.loc[i, 'oxygen_days'] = oxygen
    if oxygen > 0 :
        clinical.loc[i, 'lung_disease'] = 'Yes'
    else: 
        clinical.loc[i, 'lung_disease'] = 'No'

ethnicity = info[['Participant ID', 'Necrotising enterocolitis was suspected or confirmed on any day in this episode. ']].dropna()
for i, row in df.iterrows():
    ni = ethnicity[ethnicity['Participant ID'] == row.subject_id]['Necrotising enterocolitis was suspected or confirmed on any day in this episode. '].values
    
    if len(ni) == 0:
        ni ='No'
    elif ni[0] == 'None':
        ni = 'No'
    else:
        ni = 'Yes'
        
    clinical.loc[i, 'Necrotising_enterocolitis'] = ni

#### add the outcome scores 

In [17]:
## OUTCOMES
outcomes = pd.read_csv('../../SourceData/subject_outcomes.csv')
outcomes = outcomes.dropna(subset=outcomes.columns[1:])
outcomes.rename(columns={'Participant ID': 'participant_id ', 
                        'Composite Score': 'Cognitive Composite Score'
                        }, inplace=True)

outcomes = outcomes[['participant_id ', 'Cognitive Composite Score', 
                     'Communication Composite Score', 'Motor Composite Score' ]]

for i, row in clinical.iterrows():
    clinical.loc[i, 'Cognitive Score'] = outcomes[outcomes['participant_id '] == row.subject_id]['Cognitive Composite Score'].values[0]
    clinical.loc[i, 'Language Score']= outcomes[outcomes['participant_id '] == row.subject_id]['Communication Composite Score'].values[0]
    clinical.loc[i, 'Motor Score']= outcomes[outcomes['participant_id ']== row.subject_id ]['Motor Composite Score'].values[0]

### if we use exclusion criteria: 

In [20]:
exclude= []

for i, row in clinical.iterrows():
    
    if row.Current_smoke_alcohol != 'No' or row.HIE != 'No' or row.lung_disease != 'No' or row.Necrotising_enterocolitis != 'No':
        exclude.append(row.subject_id)

In [21]:
ex = clinical[clinical.subject_id.isin(exclude)]

In [22]:
print('Current smoke/alcohol:', len(ex[ex.Current_smoke_alcohol == 'Yes']))
print('HIE:', len(ex[ex.HIE == 'Yes']))
print('Lung Disease:',len(ex[ex.lung_disease == 'Yes']))
print('Necrotising enterocolitis:', len(ex[ex.Necrotising_enterocolitis == 'Yes']))

print('Combined to remove:', len(ex))

Current smoke/alcohol: 37
HIE: 0
Lung Disease: 29
Necrotising enterocolitis: 2
Combined to remove: 63


In [23]:
#clinical.to_csv('../../DerivedData/cohortA_subjects_clinical.csv')

# Cohort creation 

In [24]:
df = pd.read_csv('../../DerivedData/cohortA_subjects_clinical.csv', index_col=0)

In [25]:
## add age at assessment 
## remove subjects that are outside 17-20 range for age at assessment
a = pd.read_csv('../../SourceData/DHCPNDH1-TomArichiSaraNeumane_DATA_2021-12-16_1359.csv', low_memory=False)
a = a[['participationid','age_at_assess_m']].dropna()

for i, row in df.iterrows():
    df.loc[i, 'Age_BSID'] = a[a['participationid'] == row.subject_id]['age_at_assess_m'].values[0]

In [26]:
### USE EXCLUSION CRITERIA 
exclude= []
for i, row in df.iterrows():
    
    if row.Current_smoke_alcohol != 'No' or row.HIE != 'No' or row.lung_disease != 'No' or row.Necrotising_enterocolitis != 'No':
        exclude.append(row.subject_id)
        
ex = df[df.subject_id.isin(exclude)]

In [27]:
print('Initial exclusions based on QC etc: ', len(df))
print('\n')

print('Current smoke/alcohol:', len(ex[ex.Current_smoke_alcohol == 'Yes']))
print('HIE:', len(ex[ex.HIE == 'Yes']))
print('Lung Disease:',len(ex[ex.lung_disease == 'Yes']))
print('Necrotising enterocolitis:', len(ex[ex.Necrotising_enterocolitis == 'Yes']))

print('Combined to remove:', len(ex))

### additionally, if we want to keep only radiology 2 and lower 
#print('\n')
#print('Subjects with radiology 3:', len(df[df.Radiology_score > 2]))

#exclude.extend([subj for subj in df[df.Radiology_score > 2].subject_id.values])
#print('Potential final to remove:', len(df[df.subject_id.isin(exclude)]) )

Initial exclusions based on QC etc:  358


Current smoke/alcohol: 37
HIE: 0
Lung Disease: 29
Necrotising enterocolitis: 2
Combined to remove: 63


### excluding alcohol/smoke use, HIE, lung disease, necrotising enterocolitis 

In [29]:
### Apply exclusion criteria:
df = df[~df.subject_id.isin(exclude)]
print('Final number of subjects: ', len(df))

Final number of subjects:  295


In [30]:
#for race
for i, row in df.iterrows():
    if row.ethnicity_mother == row.ethnicity_father:
        df.loc[i, 'race'] = row.ethnicity_mother
    else: 
        df.loc[i, 'race'] = 'Mixed'

### Creating the cohorts

In [31]:
cohorts = {}

### Cohort A 
All available subjects except for baseline exclusion criteria 

In [33]:
print('New cohort A (baseline):', len(df))
print('\n')
for col in ['PMA_scan', 'GA_birth', 'Age_BSID','Weight_birth', 'Cognitive Score', 'Language Score', 'Motor Score']:
    print('{}: mean {:.3f} ({:.3f} std); median {:.3f}; [{:.3f}, {:.3f}]'.format(col,
                    np.mean(df[col]), np.std(df[col]), np.median(df[col]), 
                    np.min(df[col]), np.max(df[col])))
males = len(df[df.Sex =='male '])
white = len(df[df.race == 'White'])
print('Male: {} ({:.3f}%)'.format(males, (males*100)/len(df) ))
print('White: {} ({:.3f}%)'.format(white, (white*100)/len(df) ))
print('Mode of delivery: V {}, I {}, Cem {}, Cel {}'.format(
            len(df[df.Delivery == 'V']), len(df[df.Delivery == 'I']),
            len(df[df.Delivery == 'C_em']), len(df[df.Delivery == 'C_el']) ))

cohorts['A'] = df.subject_id.values

New cohort A (baseline): 295


PMA_scan: mean 40.428 (2.446 std); median 40.860; [31.140, 45.140]
GA_birth: mean 39.148 (2.361 std); median 39.857; [29.857, 42.286]
Age_BSID: mean 18.173 (0.860 std); median 18.000; [17.000, 20.000]
Weight_birth: mean 3.177 (0.689 std); median 3.295; [0.760, 4.610]
Cognitive Score: mean 101.068 (11.175 std); median 100.000; [60.000, 130.000]
Language Score: mean 97.044 (16.046 std); median 97.000; [47.000, 153.000]
Motor Score: mean 101.654 (9.481 std); median 103.000; [70.000, 127.000]
Male: 158 (53.559%)
White: 158 (53.559%)
Mode of delivery: V 155, I 67, Cem 73, Cel 0


### cohort B 
Same as above except we set limits at scan ages to make them similar to eLife paper

In [34]:
### cohort B
df = df[df.PMA_scan >= 31.9 ]
df = df[df.PMA_scan <= 41.7 ]

print('Cohort B:', len(df))
print('\n')
for col in ['PMA_scan', 'GA_birth', 'Weight_birth', 'Cognitive Score', 'Language Score', 'Motor Score']:
    print('{}: mean {:.3f} ({:.3f} std); median {:.3f}; [{:.3f}, {:.3f}]'.format(col,
                    np.mean(df[col]), np.std(df[col]), np.median(df[col]), 
                    np.min(df[col]), np.max(df[col])))
males = len(df[df.Sex =='male '])
white = len(df[df.race == 'White'])
print('Male: {} ({:.3f}%)'.format(males, (males*100)/len(df) ))
print('White: {} ({:.3f}%)'.format(white, (white*100)/len(df) ))
print('Mode of delivery: V {}, I {}, Cem {}, Cel {}'.format(
            len(df[df.Delivery == 'V']), len(df[df.Delivery == 'I']),
            len(df[df.Delivery == 'C_em']), len(df[df.Delivery == 'C_el']) ))

cohorts['B'] = df.subject_id.values

Cohort B: 198


PMA_scan: mean 39.310 (2.016 std); median 39.930; [32.290, 41.570]
GA_birth: mean 38.530 (2.422 std); median 39.143; [29.857, 41.429]
Weight_birth: mean 3.037 (0.703 std); median 3.110; [0.760, 4.590]
Cognitive Score: mean 99.672 (11.349 std); median 100.000; [60.000, 130.000]
Language Score: mean 95.227 (16.412 std); median 97.000; [47.000, 153.000]
Motor Score: mean 100.949 (9.788 std); median 100.000; [70.000, 127.000]
Male: 111 (56.061%)
White: 94 (47.475%)
Mode of delivery: V 103, I 41, Cem 54, Cel 0


### cohort C
Same as above but we also set limits to age at birth 

In [35]:
### cohort C
df = df[df.GA_birth >= 26 ]
df = df[df.GA_birth <= 41.4 ]

print('Cohort C:', len(df))
print('\n')
for col in ['PMA_scan', 'GA_birth', 'Weight_birth', 'Cognitive Score', 'Language Score', 'Motor Score']:
    print('{}: mean {:.3f} ({:.3f} std); median {:.3f}; [{:.3f}, {:.3f}]'.format(col,
                    np.mean(df[col]), np.std(df[col]), np.median(df[col]), 
                    np.min(df[col]), np.max(df[col])))
males = len(df[df.Sex =='male '])
white = len(df[df.race == 'White'])
print('Male: {} ({:.3f}%)'.format(males, (males*100)/len(df) ))
print('White: {} ({:.3f}%)'.format(white, (white*100)/len(df) ))
print('Mode of delivery: V {}, I {}, Cem {}, Cel {}'.format(
            len(df[df.Delivery == 'V']), len(df[df.Delivery == 'I']),
            len(df[df.Delivery == 'C_em']), len(df[df.Delivery == 'C_el']) ))

cohorts['C'] = df.subject_id.values

Cohort C: 196


PMA_scan: mean 39.287 (2.014 std); median 39.860; [32.290, 41.570]
GA_birth: mean 38.501 (2.417 std); median 39.143; [29.857, 41.286]
Weight_birth: mean 3.030 (0.700 std); median 3.110; [0.760, 4.590]
Cognitive Score: mean 99.643 (11.401 std); median 100.000; [60.000, 130.000]
Language Score: mean 95.434 (16.215 std); median 97.000; [47.000, 153.000]
Motor Score: mean 100.847 (9.784 std); median 100.000; [70.000, 127.000]
Male: 111 (56.633%)
White: 93 (47.449%)
Mode of delivery: V 102, I 40, Cem 54, Cel 0


### Cohort D 
Same as above but set limits on score ranges

In [36]:
df = df[(df['Cognitive Score'] >= 65)  & (df['Cognitive Score'] <= 110) ]
df = df[(df['Language Score'] >= 56) & (df['Language Score'] <= 112) ]
df = df[(df['Motor Score'] >= 73) & (df['Motor Score'] <= 107) ]
#print('Number of subjects (Cohort C): {}'.format(len(df)))

print('Cohort D:', len(df))
print('\n')
for col in ['PMA_scan', 'GA_birth', 'Weight_birth', 'Cognitive Score', 'Language Score', 'Motor Score']:
    print('{}: mean {:.3f} ({:.3f} std); median {:.3f}; [{:.3f}, {:.3f}]'.format(col,
                    np.mean(df[col]), np.std(df[col]), np.median(df[col]), 
                    np.min(df[col]), np.max(df[col])))
males = len(df[df.Sex =='male '])
white = len(df[df.race == 'White'])
print('Male: {} ({:.3f}%)'.format(males, (males*100)/len(df) ))
print('White: {} ({:.3f}%)'.format(white, (white*100)/len(df) ))
print('Mode of delivery: V {}, I {}, Cem {}, Cel {}'.format(
            len(df[df.Delivery == 'V']), len(df[df.Delivery == 'I']),
            len(df[df.Delivery == 'C_em']), len(df[df.Delivery == 'C_el']) ))

cohorts['D'] = df.subject_id.values

Cohort D: 126


PMA_scan: mean 39.460 (1.793 std); median 40.000; [32.710, 41.570]
GA_birth: mean 38.653 (2.172 std); median 39.143; [29.857, 41.286]
Weight_birth: mean 3.086 (0.646 std); median 3.175; [1.250, 4.590]
Cognitive Score: mean 95.675 (9.136 std); median 95.000; [65.000, 110.000]
Language Score: mean 90.873 (12.806 std); median 91.000; [59.000, 112.000]
Motor Score: mean 97.198 (7.980 std); median 98.500; [73.000, 107.000]
Male: 73 (57.937%)
White: 52 (41.270%)
Mode of delivery: V 60, I 32, Cem 34, Cel 0


### save subject ids for cohorts 

In [37]:
#with open(r"../../DerivedData/cohorts_subjects_list.pickle", "wb") as output_file:
#    pickle.dump(cohorts, output_file)
   