In [83]:
import pandas as pd
import os
import shutil
from tqdm import tqdm
import numpy as np

In [121]:
def safe_mkdir(fn):
    import os
    if not os.path.exists(fn):
        os.mkdir(fn)
        
def safe_mkdir_subdirs(fn):
    fn_split = fn.split('/')
    n = len(fn.split('/'))
    paths = ['/'.join(fn_split[0:i]) for i in range(0,n) if len('/'.join(fn_split[0:i]))>1]
    for path in paths:
        safe_mkdir(path)

In [9]:
df_adhd = pd.read_csv('./Derivatives/ABCD_ADHD_subjectIDs_N_1017.csv')
df_td = pd.read_csv('./Derivatives/ABCD_NON_ADHD_subjectIDs_N_4021.csv')
df_adhd['subjectkey'][0:5]

0    NDAR_INV7JERJG3Z
1    NDAR_INVD505AZ4G
2    NDAR_INV2LD0E2FB
3    NDAR_INV6EJF2KCZ
4    NDAR_INV2K2KV0JW
Name: subjectkey, dtype: object

In [18]:
names_adhd = ['sub-'+sub.replace('_','') for sub in df_adhd['subjectkey'].values]
names_td = ['sub-'+sub.replace('_','') for sub in df_td['subjectkey'].values]
names_adhd[0:5]

['sub-NDARINV7JERJG3Z',
 'sub-NDARINVD505AZ4G',
 'sub-NDARINV2LD0E2FB',
 'sub-NDARINV6EJF2KCZ',
 'sub-NDARINV2K2KV0JW']

In [12]:
anat_dir = './Data/ABCDdata/'
anat_subs = [folder for folder in os.listdir(anat_dir) if folder.startswith('sub-')]
anat_subs.sort()
anat_subs[0:5]

['sub-NDARINV005V6D2C',
 'sub-NDARINV007W6H7B',
 'sub-NDARINV00BD7VDC',
 'sub-NDARINV00CY2MDM',
 'sub-NDARINV00HEV6HB']

In [21]:
len(pd.unique(anat_subs))==len(anat_subs)

True

In [42]:
check_adhd = pd.DataFrame(names_adhd,columns=['subID'])['subID'].isin(anat_subs).values
print(f'ADHD has anats: {check_adhd.mean()*100:.2f}% | {check_adhd.sum()}/{check_adhd.shape[0]}')

ADHD has anats: 97.84% | 995/1017


In [46]:
check_td = pd.DataFrame(names_td,columns=['subID'])['subID'].isin(anat_subs).values
print(f'TD has anats: {check_td.mean()*100:.2f}% | {check_td.sum()}/{check_td.shape[0]}')

TD has anats: 98.91% | 3977/4021


In [55]:
new_dir = './Data/ABCD-data-use/'

In [62]:
## Copy ADHD subjects
for sub in tqdm(names_adhd):
    if sub in anat_subs:
        indir = os.path.join(anat_dir,sub)
        outdir = os.path.join(new_dir,sub)
        if not os.path.exists(outdir):
            shutil.copytree(src=indir,dst=outdir)

100%|███████████████████████████████████████| 1017/1017 [09:33<00:00,  1.77it/s]


In [63]:
## Copy TD subjects
for sub in tqdm(names_td):
    if sub in anat_subs:
        indir = os.path.join(anat_dir,sub)
        outdir = os.path.join(new_dir,sub)
        if not os.path.exists(outdir):
            shutil.copytree(src=indir,dst=outdir)

100%|███████████████████████████████████████| 4021/4021 [18:55<00:00,  3.54it/s]


In [108]:
## How many ADHD subjects have two sessions

has_1st = []
has_2nd = []
has_both = []

#all_sess = []
for name_adhd in tqdm(names_adhd):
    try:
        #all_sess.append(os.listdir(os.path.join(anat_dir,name_adhd)))
        has1 = 'ses-baselineYear1Arm1' in os.listdir(os.path.join(anat_dir,name_adhd))
        has2 = 'ses-2YearFollowUpYArm1' in os.listdir(os.path.join(anat_dir,name_adhd))
        has_1_2 = all(('ses-baselineYear1Arm1' in os.listdir(os.path.join(anat_dir,name_adhd)),'ses-2YearFollowUpYArm1' in os.listdir(os.path.join(anat_dir,name_adhd))))
    except:
        has1 = False
        has2 = False
        has_1_2 = False

    has_1st.append(has1)
    has_2nd.append(has2)
    has_both.append(has_1_2)

100%|████████████████████████████████████| 1017/1017 [00:00<00:00, 13545.27it/s]


In [112]:
print(f'first session: {np.array(has_1st).sum()}/{len(has_1st)}')

959/1017


In [113]:
print(f'second session: {np.array(has_2nd).sum()}/{len(has_2nd)}')

742/1017


In [114]:
print(f'both sessions: {np.array(has_both).sum()}/{len(has_both)}')

706/1017


## Split into sessions T0 and T2

In [119]:
new_dir = './Data/ABCD-data-use/'
folders = [folder for folder in os.listdir(new_dir) if folder.startswith('sub-')]
folders.sort()
folders[0:5]

['sub-NDARINV007W6H7B',
 'sub-NDARINV00LJVZK2',
 'sub-NDARINV00U4FTRU',
 'sub-NDARINV00X2TBWJ',
 'sub-NDARINV014RTM1V']

In [120]:
new_dir_t0 = './Data/ABCD-data-use-T0/'
new_dir_t2 = './Data/ABCD-data-use-T2/'

In [166]:
shutil.copyfile(os.path.join(anat_dir,'dataset_description.json'),
               os.path.join(new_dir_t0,'dataset_description.json'))

'./Data/ABCD-data-use-T0/dataset_description.json'

In [168]:
shutil.copyfile(os.path.join(anat_dir,'dataset_description.json'),
               os.path.join(new_dir_t2,'dataset_description.json'))

'./Data/ABCD-data-use-T2/dataset_description.json'

In [125]:
safe_mkdir(new_dir_t0)
safe_mkdir(new_dir_t2)

In [153]:
for sub in tqdm(folders):
    subpath_orig = os.path.join(new_dir,sub)
    subpath_orig_conts = os.listdir(subpath_orig)
    
    # Copy out first session 
    if 'ses-baselineYear1Arm1' in subpath_orig_conts:
        if not os.path.exists(os.path.join(new_dir_t0,sub,'ses-baselineYear1Arm1')):
            safe_mkdir(os.path.join(new_dir_t0,sub))
            shutil.copytree(src=os.path.join(new_dir,sub,'ses-baselineYear1Arm1'),
                           dst=os.path.join(new_dir_t0,sub,'ses-baselineYear1Arm1'))
            
    if 'ses-2YearFollowUpYArm1' in subpath_orig_conts:
        if not os.path.exists(os.path.join(new_dir_t2,sub,'ses-2YearFollowUpYArm1')):
            safe_mkdir(os.path.join(new_dir_t2,sub))
            shutil.copytree(src=os.path.join(new_dir,sub,'ses-2YearFollowUpYArm1'),
                           dst=os.path.join(new_dir_t2,sub,'ses-2YearFollowUpYArm1'))

100%|███████████████████████████████████████| 4972/4972 [20:23<00:00,  4.06it/s]


In [157]:
len([i for i in os.listdir(new_dir_t0) if i.startswith('sub-')])

4836

In [158]:
len([i for i in os.listdir(new_dir_t2) if i.startswith('sub-')])

4034

In [64]:
## THEN LAUNCH '''SBATCH slurm-01-fmriprep-anats.sh'''