In [None]:
import pandas as pd
import os
import shutil
import numpy as np
from tqdm import tqdm
import ants
import nibabel as nib

In [None]:
#make txt file into csv
read_file = pd.read_csv ('/mmfs1/data/pijarj/NDAR_BoldAnat10/image03.txt',delimiter='\t')
read_file.to_csv ('/mmfs1/data/pijarj/BC-ORG-Data/Data/image03.csv', index=0)

In [None]:
#slice general csv into dataset
df = pd.read_csv('image03.csv')
study_name = 'Biomarkers of Developmental Trajectories and Treatment in ASD'
study_df = df[df['collection_title']==study_name]
study_df.to_csv('../Data/DS2026.csv')
print(len(study_df))

In [None]:
df = pd.read_csv('../Data/DS-2026.csv')
df['is_nii_gz'] = [file.endswith('.nii.gz') for file in df['local_paths'].values]
df = df[df['is_nii_gz']]

In [None]:
def safe_mkdir(path):
    if not os.path.exists(path):
        os.mkdir(path)
    else:
        pass

In [None]:
def write_json(data,filepath):
    import json
    with open(filepath, 'w') as outfile:
        json.dump(data, outfile)

In [None]:
#check for unique subjects with both func and anat
study_df = pd.read_csv('DS-2026.csv')
study_subjects = np.unique(study_df['subjectkey'].values)
nsubjects = len(study_subjects)
print(nsubjects)
def check_has_anat_and_epi(sub):
    sub_df = study_df.iloc[study_df['subjectkey'].values==sub]
    fmri_idx = sub_df['scan_type'].values=='fMRI'
    anat_idx = sub_df['scan_type'].values=='MR structural (T1)'
    return fmri_idx.sum()>0 and anat_idx.sum()>0

In [None]:
ndar_root = '/mmfs1/data/pijarj/NDAR_BoldAnat10/' 
bids_root = '/mmfs1/data/pijarj/'

In [None]:
has_anat_and_epi = np.array([check_has_anat_and_epi(s) for s in study_subjects])

In [None]:
#find subjects with anat and func
use_subjects = study_subjects[has_anat_and_epi]
nsubjects = len(use_subjects)
print(nsubjects)

In [None]:
# MAKE THE BIDS DIRECTORY
safe_mkdir(os.path.join(bids_root,f'ds-2026'))
for s in range(1,nsubjects+1):
    safe_mkdir(os.path.join(bids_root,f'ds-2026',f'sub-{s:03d}'))
    safe_mkdir(os.path.join(bids_root,f'ds-2026',f'sub-{s:03d}','func'))
    safe_mkdir(os.path.join(bids_root,f'ds-2026',f'sub-{s:03d}','anat'))

In [None]:
#making local paths column
df = pd.read_csv('DS2026.csv')
n = len(df)
#root = './image03/'
root = os.path.expanduser('~/NDAR_BoldAnat10/image03/')
local_paths = list()
for i in tqdm(range(n)):
    s3_path = df['image_file'].values[i]
    splits = s3_path.split('/')[4::]
    relative = '/'.join(splits)
    local_path = os.path.join(root,relative)
    assert os.path.exists(os.path.join(root,relative))
    local_paths.append(local_path)
df['local_paths'] = local_paths
df.to_csv('DS-2026.csv',index=0)

In [None]:
#copy the subjects into folders
s = 0
sub = use_subjects[s]
sub_df = study_df.iloc[study_df['subjectkey'].values==sub]
fmri_idx = sub_df['scan_type'].values=='fMRI'
anat_idx = sub_df['scan_type'].values=='MR structural (T1)'

epi_fn = sub_df.iloc[fmri_idx]['local_paths'].values[0]
anat_fn = sub_df.iloc[anat_idx]['local_paths'].values[0]

epi_path = os.path.join(ndar_root,epi_fn[2::])
anat_path = os.path.join(ndar_root,anat_fn[2::])

epi_dest = os.path.join(bids_root,f'ds-2026',f'sub-{s+1:03d}','func',f'sub-{s+1:03d}_task-rest_bold.nii.gz')
anat_dest = os.path.join(bids_root,f'ds-2026',f'sub-{s+1:03d}','anat',f'sub-{s+1:03d}_T1w.nii.gz')

In [None]:
for s in tqdm(range(0,nsubjects)):
    sub = use_subjects[s]
    sub_df = study_df.iloc[study_df['subjectkey'].values==sub]
    fmri_idx = sub_df['scan_type'].values=='fMRI'
    anat_idx = sub_df['scan_type'].values=='MR structural (T1)'

    epi_fn = sub_df.iloc[fmri_idx]['local_paths'].values[0]
    anat_fn = sub_df.iloc[anat_idx]['local_paths'].values[0]

    #epi_path = os.path.join(ndar_root,epi_fn[1::])
    #anat_path = os.path.join(ndar_root,anat_fn[1::])
    
    epi_path = os.path.join(ndar_root,epi_fn)
    anat_path = os.path.join(ndar_root,anat_fn)

    epi_dest = os.path.join(bids_root,f'ds-2026',f'sub-{s+1:03d}','func',f'sub-{s+1:03d}_task-rest_bold.nii.gz')
    anat_dest = os.path.join(bids_root,f'ds-2026',f'sub-{s+1:03d}','anat',f'sub-{s+1:03d}_T1w.nii.gz')

    t1 = ants.image_read(anat_path) 
    bold = ants.image_read(epi_path)

    t1.to_filename(anat_dest)
    bold.to_filename(epi_dest)

    bold_json = {"RepetitionTime" : bold.spacing[-1],
                "TaskName" : 'rest'}
    write_json(bold_json,epi_dest.replace('.nii.gz','.json')) 
    im = nib.load(epi_dest)
    header = im.header.copy()
    header.set_xyzt_units(xyz='mm', t='sec')
    nib.nifti1.Nifti1Image(im.get_fdata(), None, header=header).to_filename(epi_dest)
    assert nib.load(epi_dest).header.get_xyzt_units()==('mm', 'sec'),'timing missing from header'

In [None]:
import json
data = {
    "Name" : study_df["collection_title"].values[0] ,
    "RepetitionTime": 2.0,
    "SliceTiming" : 2.0 ,
    "TaskName" : "taskrest" ,
    "BIDSVersion" : "20.2.0"}
json_string = json.dumps(data)
print(json_string)
with open(os.path.join(bids_root,f'ds-2026','dataset_description.json'), 'w') as outfile:
    json.dump(json_string, outfile)