In [1]:
# This notebook simply gets all the subjects ids from the raw MEG files for the LPP experiment
# and searches for each subject, their MRI anat scan to copy into a folder
# Later on, these scans under the DICOM format will be put into the nii.gz format, using dcm2niix

import pandas as pd
import numpy as np
from pathlib import Path
import subprocess



#raw = Path('/home/is153802/neurospin/acquisition/neuromag/data/petit_prince')
raw = Path('/home/is153802/data/raw_old')
scan_path = Path('/home/is153802/neurospin/acquisition/database/Prisma_fit')
output_folder = Path("/home/is153802/workspace_LPP/data/MEG/LPP/anat")
bids_folder = Path("/home/is153802/workspace_LPP/data/MEG/LPP/BIDS")

subj = [str(sub) for sub in raw.iterdir()]
list_scans = []
with open('/home/is153802/code/source-reconstruction/anat_mri.txt', 'r') as fh:
    for line in fh:
        list_scans.append((line))

list_scans = [scan.replace('\n','') for scan in list_scans]

list_scans_subjects = [scan[11:19] for scan in list_scans]

# To generate before the txt file with the anat MRI:
# cd /neurospin/acquisition/database/Prisma_fit 
# find -maxdepth 3 -name '*mprage*' -type d > /home/is153802/code/source-reconstruction/anat_mri.txt
# chmod 777 /home/is153802/code/source-reconstruction/anat_mri.txt

list_scans_subjects

['me220033',
 'sa220376',
 'cl220375',
 'sn220387',
 'co170274',
 'jm190617',
 'ld190491',
 'vb190684',
 'hw220243',
 'ep220179',
 'tm180095',
 'mf210328',
 'mm210150',
 'vb170091',
 'el190724',
 'ml200349',
 'tz200357',
 'af150011',
 'ms180425',
 'cb200314',
 'em090307',
 'jm100042',
 'ml220201',
 'yp220190',
 'pe220153',
 'zp130008',
 'jb210602',
 'ap120157',
 'sm210219',
 'fd110104',
 'md210226',
 'cd120206',
 'ag140026',
 'jl190711',
 'bl210434',
 'id220259',
 'rh220419',
 'mt170249',
 'al180103',
 'fm180074',
 'ml180318',
 'ld190491',
 'at160380',
 'lj150477',
 'df150397',
 'nk160207',
 'ar160084',
 'cg130228',
 'am120405',
 'et200081',
 'et200081',
 'et120081',
 'et120081',
 'gp190581',
 'cc130066',
 'ob200208',
 'sc120530',
 'Test-FM-',
 'bm190077',
 'cl190429',
 'ms210054',
 'ec110149',
 'tg200282',
 'mm190138',
 'rt150347',
 'ag190008',
 'at190242',
 'rt160272',
 'kb190696',
 'ab200039',
 'lv210255',
 'eo190576',
 'ec190611',
 'mk140264',
 'ys220256',
 'ml180195',
 'id200058',

In [2]:
# Cleaning the list of subjects - to keep only LPP subjects
subjects = []
for sub in subj: 
    index = sub.rfind("petit_prince/")
    subjects.append(sub[index+13:])

# Hardcoded fixes for previous subjects with incorrect names, etc..
# This should not be a problem in the future, thus stay unchanged 
"""subjects.remove('rt220104_rt220104')
subjects.remove('ml100438_ml100438')
subjects.remove('empty_room')
subjects.append('ml_100438')
subjects.append('rt_220104')
# TODO figure out the ab73???
"""
subjects = [subject.replace('_','')[-8:] for subject in subjects]

len(subjects)

28

In [3]:
# Intersecting the subjects for LPP and the scans
list_scans_final = [scan for i, scan in enumerate(list_scans) if list_scans_subjects[i] in (subjects)]
list_scans_final

['./20190722/cl190429-6553_001/000002_mprage-sag-T1-160sl',
 './20200915/lg170436-6897_001/000013_mprage-sag-T1-160sl-iPAT2',
 './20220518/rt220104-7590_001/000014_mprage-sag-T1-160sl',
 './20190702/pl170230-6522_001/000014_mprage-sag-T1-160sl',
 './20181003/mb180076-6193_001/000018_mprage-sag-T1-160sl-iPat2',
 './20220428/rt220104-7542_001/000021_mprage-sag-T1-160sl-iPAT2',
 './20181009/kc160388-6204_001/000018_mprage-sag-T1-160sl-iPat2',
 './20180412/mb180076-5995_001/000002_mprage-sag-T1-160sl',
 './20220309/rt220104-7463_001/000015_mprage-sag-T1-160sl-iPAT2',
 './20191217/js180232-6704_001/000022_mprage-sag-T1-160sl',
 './20220913/cl220500-7727_001/000016_mprage-sag-T1-160sl',
 './20190522/fr190151-6473_001/000002_mprage-sag-T1-160sl',
 './20171219/lg170436-5870_001/000002_t1-mprage-sag-1mm-iso',
 './20191016/lq180242-6623_001/000002_mprage-sag-T1-160sl',
 './20190506/fr190151-6450_001/000002_mprage-sag-T1-160sl',
 './20200918/ae140329-6903_001/000025_mprage-sag-T1-160sl-iPat2',
 '

In [4]:
# Get the number of unique patients

dict_pat_dat = {}

dict_pat_scan = {}

# Sort the list of scans in order to only get the latest MRI scans
list_scans_final.sort()

pat = np.array([pati[11:19] for pati in list_scans_final]) # list of participants
dates = np.array([pati[2:10] for pati in list_scans_final]) # list of dates of anat
scan_nb = np.array([pati[32:35] for pati in list_scans_final]) # list of dates of scan_nb
scan_nb_final = [(f"{int(scan_nb[i]):01}") for i in range(scan_nb.shape[0])]

for pa, da in zip(pat,dates):
    dict_pat_dat[pa] = da
    
for pa, scan in zip(pat,scan_nb_final):
    dict_pat_scan[pa] = scan


In [5]:
# For each subject:
# Get their subset of scans
# Sort it by date, and take the latest
scan_to_move = []

for sub in subjects:
    list_scan_specific = [scan for i, scan in enumerate(list_scans_final) if scan[11:19] == sub]
    # If no scan for the subject, skip it
    if list_scan_specific == []:
        continue
    # print(f'Sub {sub} ! list {list_scan_specific}')
    list_scan_specific.sort(reverse = True)


    scan_to_move.append((list_scan_specific[0]))
scan_to_move = [scan.replace('./','') for scan in scan_to_move]
(scan_to_move)

['20210610/vc200442-7162_001/000020_mprage-sag-T1-160sl-iPAT2',
 '20191217/js180232-6704_001/000022_mprage-sag-T1-160sl',
 '20200918/ae140329-6903_001/000025_mprage-sag-T1-160sl-iPat2',
 '20151207/cc150418-4939_001/000002_mprage-sag-T1-160sl',
 '20220801/kp210315-7695_001/000002_mprage-sag-T1-160sl',
 '20220719/ml220421-7681_001/000014_mprage-sag-T1-160sl',
 '20180116/ml180010-5890_001/000004_mprage-sag-T1-160sl',
 '20220927/pl170230-7754_001/000002_mprage-sag-T1-160sl-iPAT2',
 '20220621/ml100438-7646_001/000014_mprage-sag-T1-160sl',
 '20210609/lp090137-7160_001/000002_mprage-sag-T1-0.9mm-176sl-PF7-8',
 '20181009/kc160388-6204_001/000018_mprage-sag-T1-160sl-iPat2',
 '20220518/rt220104-7590_001/000014_mprage-sag-T1-160sl',
 '20220328/ap220150-7489_001/000002_mprage-sag-T1-160sl',
 '20180130/mf180020-5904_001/000002_mprage-sag-T1-160sl',
 '20190722/cl190429-6553_001/000002_mprage-sag-T1-160sl',
 '20200915/lg170436-6897_001/000013_mprage-sag-T1-160sl-iPAT2',
 '20200130/fr190151-6735_001/0

In [None]:
# Copying the scans to the right folder
for scan in scan_to_move:
    scan_path_specific = scan_path / scan
    sub = scan[9:17]
    if((output_folder/sub).exists()):
        print(f" {output_folder/sub}  exists")
        continue
    else:
        bashCommand = f"mkdir -p {output_folder} "
        process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
        output, error = process.communicate()
        bashCommand = f"cp -r   {scan_path_specific} {output_folder/sub}"
        process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
        output, error = process.communicate()

 /home/is153802/workspace_LPP/data/MEG/LPP/anat/vc200442  exists
 /home/is153802/workspace_LPP/data/MEG/LPP/anat/js180232  exists
 /home/is153802/workspace_LPP/data/MEG/LPP/anat/ae140329  exists
 /home/is153802/workspace_LPP/data/MEG/LPP/anat/cc150418  exists
 /home/is153802/workspace_LPP/data/MEG/LPP/anat/kp210315  exists
 /home/is153802/workspace_LPP/data/MEG/LPP/anat/ml220421  exists
 /home/is153802/workspace_LPP/data/MEG/LPP/anat/ml180010  exists
 /home/is153802/workspace_LPP/data/MEG/LPP/anat/pl170230  exists
 /home/is153802/workspace_LPP/data/MEG/LPP/anat/ml100438  exists
 /home/is153802/workspace_LPP/data/MEG/LPP/anat/lp090137  exists
 /home/is153802/workspace_LPP/data/MEG/LPP/anat/kc160388  exists
 /home/is153802/workspace_LPP/data/MEG/LPP/anat/rt220104  exists
 /home/is153802/workspace_LPP/data/MEG/LPP/anat/ap220150  exists
 /home/is153802/workspace_LPP/data/MEG/LPP/anat/mf180020  exists
 /home/is153802/workspace_LPP/data/MEG/LPP/anat/cl190429  exists
 /home/is153802/workspace

In [6]:
scan_to_move


['20210610/vc200442-7162_001/000020_mprage-sag-T1-160sl-iPAT2',
 '20191217/js180232-6704_001/000022_mprage-sag-T1-160sl',
 '20200918/ae140329-6903_001/000025_mprage-sag-T1-160sl-iPat2',
 '20151207/cc150418-4939_001/000002_mprage-sag-T1-160sl',
 '20220801/kp210315-7695_001/000002_mprage-sag-T1-160sl',
 '20220719/ml220421-7681_001/000014_mprage-sag-T1-160sl',
 '20180116/ml180010-5890_001/000004_mprage-sag-T1-160sl',
 '20220927/pl170230-7754_001/000002_mprage-sag-T1-160sl-iPAT2',
 '20220621/ml100438-7646_001/000014_mprage-sag-T1-160sl',
 '20210609/lp090137-7160_001/000002_mprage-sag-T1-0.9mm-176sl-PF7-8',
 '20181009/kc160388-6204_001/000018_mprage-sag-T1-160sl-iPat2',
 '20220518/rt220104-7590_001/000014_mprage-sag-T1-160sl',
 '20220328/ap220150-7489_001/000002_mprage-sag-T1-160sl',
 '20180130/mf180020-5904_001/000002_mprage-sag-T1-160sl',
 '20190722/cl190429-6553_001/000002_mprage-sag-T1-160sl',
 '20200915/lg170436-6897_001/000013_mprage-sag-T1-160sl-iPAT2',
 '20200130/fr190151-6735_001/0

In [None]:
# Transforming each folder of DICOM scans to the niix format

dict_id_to_nip = {}
for sub in output_folder.iterdir():
    identifiable = str(sub)[-8:]
    nip = dict_id_to_nip(identifiable)

    dicom_folder_sub = output_folder / identifiable  # where the DICOM files of a sub are stored
    output_bids = bids_folder / f'sub-{nip}/ses-01/anat/'  # where the niix file should be stored (BIDS)

    bashCommand = f"dcm2niix -o {output_bids} -z y -b y \
                    /home/is153802/workspace_LPP/data/MEG/LPP/anat/{dicom_folder_sub}"
    process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
    output, error = process.communicate()

In [None]:
for sub in output_folder.iterdir():
    print(str(sub)[-8:])

Open the participants_to_import.tsv file in order to populate it, then run the neurospin_to_bids on it

In [50]:
df__ = pd.read_csv('~/code/source-reconstruction/participants_to_import.tsv',delimiter='\t')
df = pd.DataFrame(index=[0],columns=range(8))
df.columns = df__.columns
df

Unnamed: 0,participant_id,NIP,infos_participant,session_label,acq_date,acq_label,location,to_import
0,,,,,,,,


In [8]:
import sys
sys.path.append('/home/is153802/code/bids_formatting')
from CONST import dict_nip_to_sn

In [12]:
dict_pat_scan

{'cc150418': '2',
 'kc160388': '18',
 'pl170230': '2',
 'ya170284': '2',
 'lg170436': '13',
 'ml180010': '4',
 'mf180020': '2',
 'mb180076': '25',
 'fr190151': '24',
 'cl190429': '2',
 'lq180242': '18',
 'js180232': '22',
 'ae140329': '25',
 'lp090137': '2',
 'vc200442': '20',
 'rt220104': '14',
 'ap220150': '2',
 'hg220389': '2',
 'ml100438': '14',
 'ml220421': '14',
 'kp210315': '2',
 'cl220500': '16',
 'eg220435': '14'}

In [16]:
dff = pd.DataFrame()
for sub in subjects:
    if any(sub in key for key in dict_pat_dat) and (sub in dict_pat_scan.keys()):
        sub_ = sub[:2] + '_' + sub[2:]
        date = dict_pat_dat[sub][:4] + '-' + dict_pat_dat[sub][4:6] + '-' + dict_pat_dat[sub][6:]
        df_= pd.DataFrame([f'sub-{dict_nip_to_sn[sub_]}', sub, {}, '01', date, None, 'prisma', f"(('{dict_pat_scan[sub]}','anat','T1w'))"])
        dff = pd.concat([dff,df_],axis = 1)
    else: 
        print(f'No anat for {sub} yet')

No anat for se210401 yet
No anat for df130078 yet
No anat for sa170217 yet
No anat for cj090334 yet
No anat for sf180213 yet


In [53]:
dff_t = dff.transpose()
dff_t.columns = df.columns
final = pd.concat([df,dff_t])
final = final.iloc[1:,:]
final

Unnamed: 0,participant_id,NIP,infos_participant,session_label,acq_date,acq_label,location,to_import
0,sub-27,vc200442,{},1,2021-06-10,,prisma,"(('20','anat','T1w'))"
0,sub-9,js180232,{},1,2019-12-17,,prisma,"(('22','anat','T1w'))"
0,sub-2,ae140329,{},1,2020-09-18,,prisma,"(('25','anat','T1w'))"
0,sub-3,cc150418,{},1,2015-12-07,,prisma,"(('2','anat','T1w'))"
0,sub-11,kp210315,{},1,2022-08-01,,prisma,"(('2','anat','T1w'))"
0,sub-18,ml220421,{},1,2022-07-19,,prisma,"(('14','anat','T1w'))"
0,sub-17,ml180010,{},1,2018-01-16,,prisma,"(('4','anat','T1w'))"
0,sub-19,pl170230,{},1,2022-09-27,,prisma,"(('2','anat','T1w'))"
0,sub-16,ml100438,{},1,2022-06-21,,prisma,"(('14','anat','T1w'))"
0,sub-4,lp090137,{},1,2021-06-09,,prisma,"(('2','anat','T1w'))"


In [54]:
final.to_csv('./participants_to_import.tsv',sep='\t',index= False)