# Evaluate completion of fMRIPrep on datasets

In [1]:
import os.path as op
from glob import glob

import pandas as pd

In [2]:
# Constants
project_dir = "/home/data/nbc/misc-projects/Salo_PowerReplication/"

dsets = {
    "dset-camcan": [
        "{sub}.html",
        "{sub}/func/{sub}_task-movie_echo-1_space-scanner_desc-partialPreproc_bold.nii.gz",
        "{sub}/func/{sub}_task-movie_echo-2_space-scanner_desc-partialPreproc_bold.nii.gz",
        "{sub}/func/{sub}_task-movie_echo-3_space-scanner_desc-partialPreproc_bold.nii.gz",
        "{sub}/func/{sub}_task-movie_echo-4_space-scanner_desc-partialPreproc_bold.nii.gz",
        "{sub}/func/{sub}_task-movie_echo-5_space-scanner_desc-partialPreproc_bold.nii.gz",
    ],
    "dset-cambridge": [
        "{sub}.html",
        "{sub}/func/{sub}_task-rest_echo-1_space-scanner_desc-partialPreproc_bold.nii.gz",
        "{sub}/func/{sub}_task-rest_echo-2_space-scanner_desc-partialPreproc_bold.nii.gz",
        "{sub}/func/{sub}_task-rest_echo-3_space-scanner_desc-partialPreproc_bold.nii.gz",
        "{sub}/func/{sub}_task-rest_echo-4_space-scanner_desc-partialPreproc_bold.nii.gz",
    ],
    "dset-dupre": [
        "{sub}.html",
        "{sub}/func/{sub}_task-rest_run-1_echo-1_space-scanner_desc-partialPreproc_bold.nii.gz",
        "{sub}/func/{sub}_task-rest_run-1_echo-2_space-scanner_desc-partialPreproc_bold.nii.gz",
        "{sub}/func/{sub}_task-rest_run-1_echo-3_space-scanner_desc-partialPreproc_bold.nii.gz",
    ],
    "dset-dalenberg": [
        "{sub}.html",
        "{sub}/func/{sub}_task-images_echo-1_space-scanner_desc-partialPreproc_bold.nii.gz",
        "{sub}/func/{sub}_task-images_echo-2_space-scanner_desc-partialPreproc_bold.nii.gz",
        "{sub}/func/{sub}_task-images_echo-3_space-scanner_desc-partialPreproc_bold.nii.gz",
    ],
    "dset-cohen": [
        "{sub}.html",
        "{sub}/func/{sub}_task-bilateralfingertapping_echo-1_space-scanner_desc-partialPreproc_bold.nii.gz",
        "{sub}/func/{sub}_task-bilateralfingertapping_echo-2_space-scanner_desc-partialPreproc_bold.nii.gz",
        "{sub}/func/{sub}_task-bilateralfingertapping_echo-3_space-scanner_desc-partialPreproc_bold.nii.gz",
        "{sub}/func/{sub}_task-bilateralfingertapping_echo-4_space-scanner_desc-partialPreproc_bold.nii.gz",
    ],
}

In [3]:
# Find failed subjects
for dset, target_files in dsets.items():
    print(dset)
    dset_dir = op.join(project_dir, dset)
    deriv_dir = op.join(dset_dir, "derivatives/fmriprep/")
    if not op.isdir(deriv_dir):
        print("\tDataset not yet processed.")
        continue

    participants_file = op.join(dset_dir, "participants.tsv")
    participants_df = pd.read_table(participants_file)
    subject_list = participants_df["participant_id"].tolist()
    failed_subjects = []
    for sub in subject_list:
        tfs = [op.join(deriv_dir, temp.format(sub=sub)) for temp in target_files]
        if not all(op.isfile(tf) for tf in tfs):
            failed_subjects.append(sub)

    if failed_subjects:
        print("\t" + "\n\t".join(failed_subjects))

dset-camcan
	Dataset not yet processed.
dset-cambridge
dset-dupre
dset-dalenberg
dset-cohen


In [4]:
# Find the missing files
for dset, target_files in dsets.items():
    print(dset)
    dset_dir = op.join(project_dir, dset)
    deriv_dir = op.join(dset_dir, "derivatives/fmriprep/")
    if not op.isdir(deriv_dir):
        print("\tDataset not yet processed.")
        continue

    participants_file = op.join(dset_dir, "participants.tsv")
    participants_df = pd.read_table(participants_file)
    subject_list = participants_df["participant_id"].tolist()
    failed_subjects = []
    for sub in subject_list:
        tfs = [op.join(deriv_dir, temp.format(sub=sub)) for temp in target_files]
        for tf in tfs:
            if not op.isfile(tf):
                print("\t{}".format(op.basename(tf)))

dset-camcan
	Dataset not yet processed.
dset-cambridge
dset-dupre
dset-dalenberg
dset-cohen
