In [87]:
import re, json, os
from pathlib import Path
import subprocess
from datetime import datetime
from tqdm import tqdm
import pandas as pd

def get_sub_ses_acq_run_t1(t1_path):
    pattern = r'(sub-\w+)(?:_(ses-\w+))?(?:_(acq-\w+))?(?:_(run-\d{1,2}))?_T1w'
    matches = re.findall(pattern, t1_path.name)
    sub, ses, acq, run = matches[0][0], matches[0][1], matches[0][2], matches[0][3]
    return sub, ses, acq, run

In [88]:
find_cmd = 'find /nfs2/harmonization/BIDS/ABVIB -mindepth 4 -maxdepth 4 \( -type l -o -type f \) -name "*T1w.nii.gz"'
t1s = subprocess.run(find_cmd, shell=True, capture_output=True, text=True).stdout.strip().splitlines()

In [117]:
#l = t1s[:10]

def create_json_dict(filepaths):
    """
    Given a list of filenames, create the initial BIDS json dictionary
    """

    user = os.getlogin()
    date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    nested_d = {}
    for t1 in tqdm(filepaths):
        current_d = nested_d
        sub, ses, acq, run = get_sub_ses_acq_run_t1(Path(t1))
        for tag in [sub, ses, acq, run]:
            if tag:
                current_d = current_d.setdefault(tag, {})
        #set the default values
        row = {'QA_status': 'yes', 'reason': '', 'user': user, 'date': date}
        current_d.update(row)
        current_d = nested_d

    return nested_d

    #print(json.dumps(nested_d, indent=4))

def convert_json_to_csv(json_dict): ### THERE IS A BUG HERE, NOT SURE WHAT IT IS
    """
    Given a QA JSON dictionary, convert it to a CSV file
    """

    def get_tag_type(d):
        tag_types = {
            'sub': 'sub',
            'ses': 'ses',
            'acq': 'acq',
            'run': 'run'
        }
        for key, value in tag_types.items():
            if d.startswith(key):
                return value
        assert False, f"Unknown tag type: {d}"

    def get_leaf_dicts(d, path=None, curr_dict=None):
        if path is None:
            path = []
        if curr_dict is None:
            curr_dict = {}
        leaf_dicts = []
        for key, value in d.items():
            #print(key)
            if isinstance(value, dict):
                new_path = path + [key]
                curr_dict[get_tag_type(key)] = key  #### For some reason, curr_dict is carrying over previous values
                leaf_dicts.extend(get_leaf_dicts(value, new_path, curr_dict))
            else:
                #curr_dict.update(d)
                #d.update(curr_dict)
                leaf_dicts.append((path, d))
                break
        return leaf_dicts

    #get the leaf dictionaries
    leaf_dicts = get_leaf_dicts(json_dict)

    #make sure that the paths are unique
    for paths,ds in leaf_dicts:
        for path in paths:
            ds[path[:3]] = path
            assert path in ds.values(), f"Path {path} not in dict {ds}"
        if 'run' not in ds:
            ds['run'] = ''
        if 'acq' not in ds:
            ds['acq'] = ''
        if 'ses' not in ds:
            ds['ses'] = ''
    #now get a list of only the leaf dictionaries
    leaf_dicts = [ds for paths,ds in leaf_dicts]
    #finally, convert to a csv
    header = ['sub', 'ses', 'acq', 'run', 'QA_status', 'reason', 'user', 'date']
    df = pd.DataFrame(leaf_dicts)
    #reorder the columns accroding to the header
    df = df[header]
    #replace NaN with empty string
    df = df.fillna('')

    df.to_csv('qa.csv', index=False)

    return df

def read_csv_to_json(df):
    """
    Given a QA CSV dataframe, convert it to a QA JSON dictionary
    """

    json_data = {}

    for index, row in df.iterrows():
        #sub, ses, acq, run = row['sub'], row['ses'], row['acq'], row['run']
        qa_status, reason, user, date = row['QA_status'], row['reason'], row['user'], row['date']
        current_d = json_data
        has_d = {}
        for tag in ['sub', 'ses', 'acq', 'run']:
            if row[tag]:
                current_d = current_d.setdefault(row[tag], {})
                has_d[tag] = row[tag]
        #set the values
        add_row = {'QA_status': qa_status, 'reason': reason, 'user': user, 'date': date}
        if 'run' not in has_d:
            add_row.update({'run': ''})
        if 'acq' not in has_d:
            add_row.update({'acq': ''})
        if 'ses' not in has_d:
            add_row.update({'ses': ''})
        add_row.update(has_d)
        current_d.update(add_row)
        current_d = json_data
    
    #print(json.dumps(json_data, indent=4))

    return json_data

def compare_dicts(d1, d2):
    """
    Compare two dictionaries
    """
    
    #assert len(d1) == len(d2), "Dictionaries have different lengths"
    for key in d1:
        #print(key)
        #print(d1)
        #print(d2)
        assert key in d2, f"Key {key} not in d2. d1: {d1} \n d2: {d2}"
        if isinstance(d1[key], dict):
            compare_dicts(d1[key], d2[key])
        else:
            assert d1[key] == d2[key], f"Values for key {key} are different: {d1[key]} vs {d2[key]}"


nested_d = create_json_dict(t1s)
df = convert_json_to_csv(nested_d)
converted_json = read_csv_to_json(df)

# for x in converted_json:
#     print(nested_d[x])
#     print(converted_json[x])

#print(nested_d['sub-2007'])
#print(converted_json['sub-2007'])
compare_dicts(nested_d, converted_json)
compare_dicts(converted_json, nested_d)

#dump the dictionary to a json file
# with open('qa1.json', 'w') as f:
#     json.dump(nested_d, f, indent=4)

# with open('qa2.json', 'w') as f:
#     json.dump(converted_json, f, indent=4)


100%|██████████| 75/75 [00:00<00:00, 56435.74it/s]


In [80]:
for x in t1s[:10]:
    print(x)

/nfs2/harmonization/BIDS/ABVIB/sub-7003/ses-20100408/anat/sub-7003_ses-20100408_acq-SPGR_T1w.nii.gz
/nfs2/harmonization/BIDS/ABVIB/sub-201/ses-20090310/anat/sub-201_ses-20090310_acq-MPRAGE_T1w.nii.gz
/nfs2/harmonization/BIDS/ABVIB/sub-3831/ses-20120508/anat/sub-3831_ses-20120508_acq-MPRAGE_T1w.nii.gz
/nfs2/harmonization/BIDS/ABVIB/sub-61040/ses-20120110/anat/sub-61040_ses-20120110_acq-MPRAGE_T1w.nii.gz
/nfs2/harmonization/BIDS/ABVIB/sub-257/ses-20111013/anat/sub-257_ses-20111013_acq-MPRAGE_run-1_T1w.nii.gz
/nfs2/harmonization/BIDS/ABVIB/sub-257/ses-20111013/anat/sub-257_ses-20111013_acq-MPRAGE_run-2_T1w.nii.gz
/nfs2/harmonization/BIDS/ABVIB/sub-2007/ses-20120320/anat/sub-2007_ses-20120320_acq-SPGR_T1w.nii.gz
/nfs2/harmonization/BIDS/ABVIB/sub-7055/ses-20120712/anat/sub-7055_ses-20120712_acq-SPGR_T1w.nii.gz
/nfs2/harmonization/BIDS/ABVIB/sub-60024/ses-20100909/anat/sub-60024_ses-20100909_acq-MPRAGE_T1w.nii.gz
/nfs2/harmonization/BIDS/ABVIB/sub-218/ses-20121108/anat/sub-218_ses-20121108_