# Extract the subject ids / Get the list of scans

#### To generate the anat_mri.txt file (necessary), run the following commands in a terminal.

cd ~/neurospin/acquisition/database/Prisma_fit 

find -maxdepth 3 -name '*mprage*' -type d > /home/is153802/code/formatting/anat_MRI/anat_mri.txt

chmod 777 /home/is153802/code/formatting/anat_MRI/anat_mri.txt

In [41]:
# This notebook simply gets all the subjects ids from the raw MEG files for the LPP experiment
# and searches for each subject, their MRI anat scan to copy into a folder
# Later on, these scans under the DICOM format will be put into the nii.gz format, using dcm2niix

import pandas as pd
import numpy as np
from pathlib import Path
import subprocess


# Visual or Auditory?
modality = 'auditory' # 'visual'

anat_base_path = '/home/is153802/workspace_LPP/data/MEG/LPP/anat'
anat_code_path = '/home/is153802/code/formatting/anat_MRI'

raw = Path(f'/home/is153802/data/raw_{modality}')

scan_path = Path('/home/is153802/neurospin/acquisition/database/Prisma_fit')

output_folder = Path(f"{anat_base_path}/{modality}")
bids_folder = Path(f"/home/is153802/workspace_LPP/workspace-LPP/data/MEG/LPP/LPP_MEG_{modality}")

subj = [str(sub) for sub in raw.iterdir()]
list_scans = []
with open(f'{anat_code_path}/anat_mri.txt', 'r') as fh:
    for line in fh:
        list_scans.append((line))

list_scans = [scan.replace('\n','') for scan in list_scans]

list_scans_subjects = [scan[11:19] for scan in list_scans]

list_scans_subjects

['me220033',
 'sa220376',
 'cl220375',
 'sn220387',
 'na220577',
 'co170274',
 'jm190617',
 'ld190491',
 'vb190684',
 'hw220243',
 'ep220179',
 'tm180095',
 'mf210328',
 'mm210150',
 'vb170091',
 'el190724',
 'ml200349',
 'tz200357',
 'af150011',
 'ms180425',
 'cb200314',
 'em090307',
 'jm100042',
 'ml220201',
 'yp220190',
 'pe220153',
 'zp130008',
 'jb210602',
 'ap120157',
 'sm210219',
 'fd110104',
 'md210226',
 'cd120206',
 'ag140026',
 'jl190711',
 'bl210434',
 'id220259',
 'rh220419',
 'mt170249',
 'um230121',
 'pc230084',
 'al180103',
 'fm180074',
 'ml180318',
 'cl230072',
 'jl230106',
 'wd230073',
 'ld190491',
 'at160380',
 'lj150477',
 'df150397',
 'nk160207',
 'ar160084',
 'cg130228',
 'am120405',
 'et200081',
 'et200081',
 'et120081',
 'et120081',
 'fr200465',
 'gp190581',
 'cc130066',
 'ob200208',
 'sc120530',
 'Test-FM-',
 'bm190077',
 'cl190429',
 'ms210054',
 'ec110149',
 'tg200282',
 'mm190138',
 'rt150347',
 'ag190008',
 'at190242',
 'rt160272',
 'kb190696',
 'ab200039',

In [4]:
# Cleaning the list of subjects - to keep only LPP subjects
subjects = []
for sub in subj: 
    index = sub.rfind("petit_prince/")
    subjects.append(sub[index+13:])

# Hardcoded fixes for previous subjects with incorrect names, etc..
# This should not be a problem in the future, thus stay unchanged 
"""subjects.remove('rt220104_rt220104')
subjects.remove('ml100438_ml100438')
subjects.remove('empty_room')
subjects.append('ml_100438')
subjects.append('rt_220104')
# TODO figure out the ab73???
"""
subjects = [subject.replace('_','')[-8:] for subject in subjects]

len(subjects)

54

In [5]:
# Intersecting the subjects for LPP and the scans
list_scans_final = [scan for i, scan in enumerate(list_scans) if list_scans_subjects[i] in (subjects)]
list_scans_final

['./20210909/mf210328-7256_001/000015_mprage-sag-T1-160sl-iPAT2',
 './20220112/ap120157-7372_001/000002_mprage-sag-T1-160sl',
 './20190722/cl190429-6553_001/000002_mprage-sag-T1-160sl',
 './20221014/lp090137-7789_001/000002_mprage-sag-T1-160sl',
 './20200915/lg170436-6897_001/000013_mprage-sag-T1-160sl-iPAT2',
 './20220518/rt220104-7590_001/000014_mprage-sag-T1-160sl',
 './20190702/pl170230-6522_001/000014_mprage-sag-T1-160sl',
 './20221209/df130078-7884_001/000017_mprage-sag-T1-160sl-iPAT2',
 './20181003/mb180076-6193_001/000018_mprage-sag-T1-160sl-iPat2',
 './20220428/rt220104-7542_001/000021_mprage-sag-T1-160sl-iPAT2',
 './20181009/kc160388-6204_001/000018_mprage-sag-T1-160sl-iPat2',
 './20180412/mb180076-5995_001/000002_mprage-sag-T1-160sl',
 './20220309/rt220104-7463_001/000015_mprage-sag-T1-160sl-iPAT2',
 './20191217/js180232-6704_001/000022_mprage-sag-T1-160sl',
 './20220913/cl220500-7727_001/000016_mprage-sag-T1-160sl',
 './20230106/al220758-7913_001/000002_mprage-sag-T1-160sl'

In [6]:
# Get the number of unique patients

dict_pat_dat = {}

dict_pat_scan = {}

# Sort the list of scans in order to only get the latest MRI scans
list_scans_final.sort()

pat = np.array([pati[11:19] for pati in list_scans_final]) # list of participants
dates = np.array([pati[2:10] for pati in list_scans_final]) # list of dates of anat
scan_nb = np.array([pati[32:35] for pati in list_scans_final]) # list of dates of scan_nb
scan_nb_final = [(f"{int(scan_nb[i]):01}") for i in range(scan_nb.shape[0])]

for pa, da in zip(pat,dates):
    dict_pat_dat[pa] = da
    
for pa, scan in zip(pat,scan_nb_final):
    dict_pat_scan[pa] = scan


In [10]:
# For each subject:
# Get their subset of scans
# Sort it by date, and take the latest
scan_to_move = []

for sub in subjects:
    list_scan_specific = [scan for i, scan in enumerate(list_scans_final) if scan[11:19] == sub]
    # If no scan for the subject, skip it
    if list_scan_specific == []:
        continue
    # print(f'Sub {sub} ! list {list_scan_specific}')
    list_scan_specific.sort(reverse = True)


    scan_to_move.append((list_scan_specific[0]))
scan_to_move = [scan.replace('./','') for scan in scan_to_move]
len(scan_to_move)

47

In [11]:
# Copying the scans to the right folder
for scan in scan_to_move:
    scan_path_specific = scan_path / scan
    sub = scan[9:17]
    if((output_folder/sub).exists()):
        print(f" {output_folder/sub}  exists")
        continue
    else:
        bashCommand = f"mkdir -p {output_folder} "
        process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
        output, error = process.communicate()
        bashCommand = f"cp -r   {scan_path_specific} {output_folder/sub}"
        process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
        output, error = process.communicate()

In [18]:
# Dict nip to sn:

if modality == 'visual':
    dict_nip_to_sn = {'cb_666666': '1',
                  'rb_666666': '2',
                  'dl_230038': '3',
                  'yb_220174': '4',
                  'mn_230056': '5',
                  'eg_220435': '6',
                  'gb_220537': '7',
                  'am_230061': '8',
                  'jd_220636': '9',
                  'jm_100042': '10',
                  'es_220094': '11',
                  'aa_230065': '12',
                  'll_180197': '13',
                  'am_220107': '14',
                  'ci_210203': '15',
                  'pl_230089': '16',
                  'jm_230095': '17',
                  'aj_220730': '18',
                  'fv_230122': '19',
                  'vr_230124': '20',
                  'gd_230114': '21',
                  'um_230121': '22',
                  'tv_230127': '23',
                  'ym_220242': '24',
                  'ac_230112': '25',
                  'po_230175': '26',
                  'jr_230176': '27',
                  'ml_110339': '28',
                  'li_230200': '29',
                  'nv_230178': '30',
                  'jf_230204': '31',
                  'cd_230186': '32',
                  're_230199': '33',
                  'nc_230202': '34',
                  'mm_230182': '35',
                  'sm_230170': '36',
                  'fd_110104': '37'
                  }
elif modality == 'auditory':
    dict_nip_to_sn = {'ae_140329': '2', 'cc_150418': '3', 'cl_220500': '5',
                  'fr_190151': '7', 'hg_220389': '8', 'js_180232': '9',
                  'kc_160388': '10', 'kp_210315': '11', 'lg_170436': '12',
                  'lq_180242': '13', 'mb_180076': '14', 'mf_180020': '15',
                  'ml_100438': '16', 'ml_180010': '17', 'ml_220421': '18',
                  'pl_170230': '19', 'rt_220104': '20', 'sa_170217': '21',
                  'sf_180213': '22', 'eg_220435': '23', 'cl_190429': '24',
                  'ap_220150': '25', 'df_130078': '26', 'lp_090137': '4',
                  'ya_170284': '6', 'vc_200442': '27', 'cj_090334': '1',
                  'se_210401': '28', 'bk_220247': '29', 'sb_220619': '30',
                  'ya_220605': '31', 'nl_220497': '32', 'ay_220681': '33',
                  'td_220613': '34', 'jp_220691': '35', 'ap_120157': '36',
                  'md_220654': '37', 'jv_220664': '38', 'tc_200507': '39',
                  'ag_220624': '40', 'rh_220668': '41', 'cl_110710': '42',
                  'am_220666': '43', 'ml_220557': '44', 'dt_220722': '45',
                  'ia_220711': '46', 'lg_220612': '47', 'jm_220720': '48',
                  'cl_220706': '49', 'mf_210328': '50', 'ad_220723': '51',
                  'ar_220740': '52', 'al_220758': '53', 'rz_220739': '54',
                  'pj_150414': '55',
                  'gt_150298': '56',
                  'ad_140107': '57',
                  'gl_150316': '58',
                  }

In [49]:
# Transforming each folder of DICOM scans to the niix format

for sub in output_folder.iterdir():
    identifiable = str(sub)[-8:]
    identifiable_ = identifiable[:2] + '_' + identifiable[2:]
    nip = dict_nip_to_sn[identifiable_]

    dicom_folder_sub = output_folder / identifiable  # where the DICOM files of a sub are stored
    output_bids = bids_folder / f'sub-{nip}/ses-01/anat/'  # where the niix file should be stored (BIDS)
    
    # Mkdir
    bashCommand = f"mkdir  {output_bids}"
    process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
    output, error = process.communicate()
    
    # Dcm2niix
    bashCommand = f"dcm2niix -o {output_bids} -z y -b y \
                    {dicom_folder_sub}"
    process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
    output, error = process.communicate()

In [50]:
for sub in output_folder.iterdir():
    print(str(sub)[-8:])

ya220605
mf210328
cl190429
hg220389
ml220421
lp090137
ay220681
ap120157
ar220740
eg220435
ml100438
rh220668
cj090334
ae140329
pl170230
ya170284
tc200507
rz220739
lq180242
ap220150
df130078
kc160388
cl220500
mb180076
fr190151
kp210315
ml180010
cc150418
jp220691
td220613
rt220104
se210401
js180232
nl220497
md220654
lg220612
jv220664
vc200442
lg170436
al220758
ad220723
mf180020
cl220706
dt220722
bk220247
sb220619
ml220557


Open the participants_to_import.tsv file in order to populate it, then run the neurospin_to_bids on it

In [None]:
touch

In [52]:
df__ = pd.read_csv(f'{anat_code_path}/{modality}/participants_to_import.tsv',delimiter='\t')
df = pd.DataFrame(index=[0],columns=range(8))
df.columns = df__.columns
df

Unnamed: 0,participant_id,NIP,infos_participant,session_label,acq_date,acq_label,location,to_import
0,,,,,,,,


In [53]:
dff = pd.DataFrame()
for sub in subjects:
    if any(sub in key for key in dict_pat_dat) and (sub in dict_pat_scan.keys()):
        sub_ = sub[:2] + '_' + sub[2:]
        date = dict_pat_dat[sub][:4] + '-' + dict_pat_dat[sub][4:6] + '-' + dict_pat_dat[sub][6:]
        df_= pd.DataFrame([f'sub-{dict_nip_to_sn[sub_]}', sub, {}, '01', date, None, 'prisma', f"(('{dict_pat_scan[sub]}','anat','T1w'))"])
        dff = pd.concat([dff,df_],axis = 1)
    else: 
        print(f'No anat for {sub} yet')

No anat for jm220720 yet
No anat for sa170217 yet
No anat for ag220624 yet
No anat for ia220711 yet
No anat for cl110710 yet
No anat for sf180213 yet
No anat for am220666 yet


In [54]:
dff_t = dff.transpose()
dff_t.columns = df.columns
final = pd.concat([df,dff_t])
final = final.iloc[1:,:]
final

Unnamed: 0,participant_id,NIP,infos_participant,session_label,acq_date,acq_label,location,to_import
0,sub-27,vc200442,{},1,2021-06-10,,prisma,"(('20','anat','T1w'))"
0,sub-9,js180232,{},1,2019-12-17,,prisma,"(('22','anat','T1w'))"
0,sub-49,cl220706,{},1,2022-12-07,,prisma,"(('2','anat','T1w'))"
0,sub-35,jp220691,{},1,2022-11-29,,prisma,"(('20','anat','T1w'))"
0,sub-28,se210401,{},1,2022-10-25,,prisma,"(('2','anat','T1w'))"
0,sub-54,rz220739,{},1,2023-01-10,,prisma,"(('2','anat','T1w'))"
0,sub-2,ae140329,{},1,2020-09-18,,prisma,"(('25','anat','T1w'))"
0,sub-26,df130078,{},1,2022-12-09,,prisma,"(('17','anat','T1w'))"
0,sub-3,cc150418,{},1,2015-12-07,,prisma,"(('2','anat','T1w'))"
0,sub-11,kp210315,{},1,2022-08-01,,prisma,"(('2','anat','T1w'))"


In [55]:
final.to_csv(f'./{modality}/participants_to_import.tsv',sep='\t',index= False)

In [57]:
pwd

'/mnt/localdrive/workspace-LPP/code/neurospin-petit-prince/formatting/anat_MRI'

In [58]:
# Copy the csv file to the server:
!scp ./{modality}/participants_to_import.tsv cb271805@132.166.141.247:/home/cb271805/mri/exp_info/participants_to_import.tsv

participants_to_import.tsv                    100% 3015    97.0KB/s   00:00    


In [None]:
# TO check
# I guess you can ssh to nautilus, and run the command
# ssh cb271805@132.166.141.247
# cd /home/cb271805/mri/
# rm -r rawdata # Make sure that there's no previous folder
# neurospin_to_bids

In [None]:
# Then, copy the generated folder back to the CP PC
# scp -r  cb271805@132.166.141.247:/home/cb271805/mri/rawdata ~/data/MRI/{modality}

In [None]:
cd ~/data/MRI/{modality}
rm participants.tsv dataset_description.json
cd .. 
rsync -auv --stats --progress modality/ LPP_MEG_{modality}/  