# Compile MRIQC metrics into TSV files
This should do the same thing as an MRIQC call at the group level, but without having to use Singularity.

In [1]:
import json
import os.path as op
from glob import glob

import pandas as pd

In [2]:
def load_json(f):
    with open(f, "r") as fo:
        data = json.load(fo)
    return data

In [3]:
base_dir = "/home/data/nbc/misc-projects/Salo_PowerReplication/"
sub_dir = "derivatives/mriqc/"
dsets = ["dset-dupre", "dset-cambridge", "dset-camcan", "dset-dalenberg", "dset-cohen"]
modalities = ["anat", "func"]

for dset in dsets:
    print(f"Processing {dset}")
    data_dict = {}

    dset_dir = op.join(base_dir, dset)
    deriv_dir = op.join(dset_dir, sub_dir)
    sub_folders = sorted(glob(op.join(deriv_dir, "sub-*")))
    sub_folders = [sf for sf in sub_folders if op.isdir(sf)]
    for sub_folder in sub_folders:
        sub_id = op.basename(sub_folder)
        for mod in modalities:
            mod_folder = op.join(sub_folder, mod)
            jsons = sorted(glob(op.join(mod_folder, "*.json")))
            for json_file in jsons:
                json_mod = "_".join(op.basename(json_file).split("_")[1:]).split(".")[0]
                if json_mod not in data_dict.keys():
                    data_dict[json_mod] = pd.DataFrame()
                
                json_data = load_json(json_file)
                json_data = {k: v for k, v in json_data.items() if not isinstance(v, dict)}
                temp_df = pd.DataFrame(json_data, index=[sub_id])
                data_dict[json_mod] = data_dict[json_mod].append(temp_df)
    
    for out_name, df in data_dict.items():
        out_file = op.join(deriv_dir, out_name + ".tsv")
        df.to_csv(out_file, sep="\t", index_label="participant_id")

Processing dset-dupre
Processing dset-cambridge
Processing dset-camcan
Processing dset-dalenberg
Processing dset-cohen
