# Create the files needed for the preproc of PPMI dataset
## 1. Subject-session csv files for sdMRI

In [1]:
# libs and envs
import sys
from pathlib import Path
import pandas as pd
import numpy as np

#proj_dir='/data/pd/ppmi' # BIC
proj_dir='/scratch' # CC
codes_dir_str=proj_dir+'/mr_proc'
sys.path.append(codes_dir_str)

# main PATH
codes_dir    = Path(codes_dir_str)
fmriprep_dir = codes_dir / 'fMRIPrep' 

# output subject session list 
col_names = ['subject', 'session']
ppmi_subj_ses_file   = fmriprep_dir / 'ppmi_subject_session.csv'  # Information from download database.

In [2]:
# Read and check the existing subject session list 
subj_ses_exist_df = pd.read_csv(ppmi_subj_ses_file, sep=',', header=None, index_col=None, names= col_names)
print('sessions in dataset: ', 'ses-'+', ses-'.join([str(x) for x in list(subj_ses_exist_df.session.unique())]))
#subj_ses_exist_df

sessions in dataset:  ses-1, ses-7, ses-21, ses-5, ses-11, ses-91, ses-90, ses-0, ses-9, ses-30


In [3]:
# create json session files
save_file=0

import json
basic_json_filename = 'anat_ses-?.json'
session_json_dict={
    "t1w": {
        "datatype": "anat",
        "session": "0",
	"suffix": "T1w"
    }
}

for x in list(subj_ses_exist_df.session.unique()):
    ses_str=str(x)
    json_file_tmp = basic_json_filename.replace('?', ses_str)
    print(json_file_tmp)
    session_json_dict['t1w']['session']=ses_str
    json_string_tmp = json.dumps(session_json_dict, indent=4)
    #print(json_string_tmp)
    if save_file==1:
        with open((fmriprep_dir/json_file_tmp), 'w', encoding='utf-8') as outfile_tmp:
            outfile_tmp.write(json_string_tmp)

anat_ses-1.json
anat_ses-7.json
anat_ses-21.json
anat_ses-5.json
anat_ses-11.json
anat_ses-91.json
anat_ses-90.json
anat_ses-0.json
anat_ses-9.json
anat_ses-30.json


## 2. Subject-session csv files for livingpark

In [5]:
save_file=0

livingpark_subj_metadata_file = codes_dir / 'tab_data' /'PPMI_livingpark_dcminfo.csv' # Information from download database.
livingpark_subj_session_file   = fmriprep_dir / 'livingpark_subject_session.csv'  

livingpark_col_names = ['Subject', 'Visit']
livingpark_subj_ses_df = pd.read_csv(livingpark_subj_metadata_file, sep=',')
livingpark_subj_ses_df = livingpark_subj_ses_df[livingpark_col_names].drop_duplicates()
livingpark_subj_ses_df['Subject']=['sub-'+str(x) for x in livingpark_subj_ses_df['Subject']]
#print('sessions in dataset: ', 'ses-'+', ses-'.join([str(x) for x in list(subj_ses_exist_df.Visit.unique())]))
if save_file:
    livingpark_subj_ses_df.to_csv(livingpark_subj_session_file, index=False, header=False)
#livingpark_subj_ses_df

## 3. rerun sdMRI fmriprep failure subjects

In [15]:
save_file=1

sdMRI_err_file = fmriprep_dir /'err_fmriprep_run1.log' # Information from download database.
sdMRI_rerun1_ses_file = fmriprep_dir / 'sdMRI_subject_session_rerun1.csv'  

subj_ses_col_names = ['Subject', 'Visit']
sdMRI_rerun1_df = pd.read_csv(sdMRI_err_file, names=subj_ses_col_names, header=None, sep='_', usecols=[2,3])
sdMRI_rerun1_df = sdMRI_rerun1_df.drop_duplicates()

if save_file:
    sdMRI_rerun1_df.to_csv(sdMRI_rerun1_ses_file, index=False, header=False)
sdMRI_rerun1_df

Unnamed: 0,Subject,Visit
0,16644,1
1,16644,5
2,3104,5
3,3106,5
4,3107,91
...,...,...
183,55282,1
184,59483,11
185,59503,7
186,59507,5


## Code below for testing: 

In [None]:
# read bids layout and output all sessions
from bids import BIDSLayout
ppmi_layout=BIDSLayout(bids_dir)
print(ppmi_layout.get_sessions())

In [None]:
# select all the available T1w images and create the subject session list 
save_file=0

suffix    = 'T1w'
extension = 'nii.gz'
ppmi_file_list=ppmi_layout.get(suffix=suffix, extension=extension, return_type='file')
ppmi_file_names=[x.split('/')[-1] for x in ppmi_file_list]

subj_ses_df = pd.DataFrame({col_names[0]:[x.split('_')[0] for x in ppmi_file_names], col_names[1]: [x.split('_')[1].split('-')[-1] for x in ppmi_file_names]})
print(subj_ses_df)

# Generate subject,session file for fMRIPrep preporocessing 
if save_file:
    subj_ses_df.to_csv(ppmi_subj_ses_file, header=False, index=False)