# Work with Test Data

In [4]:
from pathlib import Path
import os
import os.path as op
from pkg_resources import resource_filename as pkgrf
import shutil
import cubids
TEST_DATA = pkgrf("cubids", "testdata")

def test_data(tmp_path):
    data_root = tmp_path / "testdata"
    shutil.copytree(TEST_DATA, str(data_root))
    assert len(list(data_root.rglob("*"))) > 5
    return data_root

workdir = os.getcwd()

def copy_testing_data(dirname):
    newdir = op.join(workdir, dirname)
    os.makedirs(newdir)
    data_dir = test_data(Path(newdir))
    return data_dir

# copy the data
data_root = copy_testing_data("test1")

In [3]:
!rm -rf  test1

# Test the key / param groups

This test copies the data and makes sure we get the correct number of key and parameter groups out of it


In [14]:
from cubids import CuBIDS

bod = CuBIDS(str(first_test / "complete"))
bod._cache_fieldmaps()

100%|██████████| 6/6 [00:00<00:00, 268.30it/s]


[]

In [15]:
key_groups = bod.get_key_groups()
print(key_groups)

['acquisition-HASC55AP_datatype-dwi_suffix-dwi', 'acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1', 'acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2', 'acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff', 'datatype-anat_suffix-T1w', 'datatype-fmap_direction-PA_fmap-epi_suffix-epi', 'datatype-func_suffix-bold_task-rest']


In [19]:
ibod = CuBIDS(str(first_test / "inconsistent"))
misfits = ibod._cache_fieldmaps()
len(misfits)

100%|██████████| 6/6 [00:00<00:00, 267.86it/s]


1

In [21]:
ikey_groups = ibod.get_key_groups()

In [22]:
ikey_groups == key_groups

True

# Working with datalad

Here we try to initialize a datalad repo on the test data

In [5]:
import datalad.api as dlapi

dl = dlapi.create(path=first_test / "inconsistent", force=True)

[INFO] Creating a new annex repo at /Users/mcieslak/projects/CuBIDS/notebooks/test1/testdata/inconsistent 


RuntimeError: Cannot run the event loop while another loop is running

In [5]:
files_df, summary_df = bod.get_param_groups_dataframes()

In [23]:
%qtconsole

In [7]:
summary_df[["key_group", "ParamGroup", "Count"]]

Unnamed: 0,key_group,ParamGroup,Count
0,acquisition-64dir_datatype-dwi_suffix-dwi,1,3
1,acquisition-HCP_datatype-anat_suffix-T1w,1,3
2,acquisition-HCP_datatype-anat_suffix-T2w,1,1
3,acquisition-dwi_datatype-fmap_direction-AP_suf...,1,3
4,acquisition-dwi_datatype-fmap_direction-PA_suf...,1,3
5,acquisition-fMRI_datatype-fmap_direction-AP_su...,0,1
6,acquisition-fMRI_datatype-fmap_direction-AP_su...,1,1
7,acquisition-fMRI_datatype-fmap_direction-PA_su...,0,1
8,acquisition-fMRI_datatype-fmap_direction-PA_su...,1,1
9,datatype-func_run-1_suffix-bold_task-peer,1,2


In [None]:
import pandas as pd
param_group_cols = list(set(df.columns.to_list()) - set(["FilePath"]))
uniques = df.drop_duplicates(param_group_cols, ignore_index=True)
print(uniques.shape)
counts = df.groupby(["key_group", "ParamGroup"]).size().reset_index(name='Count')
print(counts.shape)

params_and_counts = pd.merge(uniques, counts)
print(params_and_counts.shape)

In [None]:
no_paths[["key_group", "ParamGroup"]].groupby(["key_group", "ParamGroup"]).count()

In [None]:
keyparam_df.groupby(["key_group", "ParamGroup"]).size().reset_index(name='Count')

In [None]:
fname = 'sub-NDARAT581NDH/ses-HBNsiteRU/dwi/sub-NDARAT581NDH_ses-HBNsiteRU_acq-64dir_dwi.nii.gz'

In [None]:
bod.get_key_groups()

In [None]:
self = bod


In [None]:
from cubids.cubids import *
suffix = '(phase1|phasediff|epi|fieldmap)'
fmap_files = self.layout.get(suffix=suffix, regex_search=True,
                             extension=['.nii.gz', '.nii'])

files_to_fmaps = defaultdict(list)

print("\n".join([f.path for f in fmap_files]))

In [None]:
"""
for fmap_file in tqdm(fmap_files):
    intentions = listify(fmap_file.get_metadata().get("IntendedFor"))
    subject_prefix = "sub-%s/" % fmap_file.entities['subject']
    for intended_for in intentions:
        subject_relative_path = subject_prefix + intended_for
        files_to_fmaps[subject_relative_path].append(fmap_file)
"""
fmap_file = fmap_files[0]
intentions = listify(fmap_file.get_metadata().get("IntendedFor"))
print("intentions:", intentions)
subject_prefix = "sub-%s/" % fmap_file.entities['subject']
print(subject_prefix)

In [None]:
suffix = '(phase1|phasediff|epi|fieldmap)'
fmap_files = self.layout.get(suffix=suffix, regex_search=True,
                             extension=['.nii.gz', '.nii'])

files_to_fmaps = defaultdict(list)
for fmap_file in tqdm(fmap_files):
    intentions = listify(fmap_file.get_metadata().get("IntendedFor"))
    subject_prefix = "sub-%s" % fmap_file.entities['subject']
    for intended_for in intentions:
        full_path = Path(self.path) / subject_prefix / intended_for
        files_to_fmaps[str(full_path)].append(fmap_file)

In [None]:
for data_file, fmap_files in bod.fieldmap_lookup.items():
    print(data_file[44:])
    for fmap_file in fmap_files:
        print("   ", fmap_file.path[44:])

In [None]:
files_to_fmaps.keys()

In [None]:
from cubids.cubids import *
files = [
 '/Users/mcieslak/projects/test_bids_data/HBN/sub-NDARAT581NDH/ses-HBNsiteRU/dwi/sub-NDARAT581NDH_ses-HBNsiteRU_acq-64dir_dwi.nii.gz',
 '/Users/mcieslak/projects/test_bids_data/HBN/sub-NDARRP384BVX/ses-HBNsiteRU/dwi/sub-NDARRP384BVX_ses-HBNsiteRU_acq-64dir_dwi.nii.gz']

dfs = []
fieldmap_lookup = bod.fieldmap_lookup
key_group_name = "test"
# path needs to be relative to the root with no leading prefix
for path in files:
    metadata = bod.layout.get_metadata(path)
    wanted_keys = metadata.keys() & IMAGING_PARAMS
    example_data = {key: metadata[key] for key in wanted_keys}
    example_data["key_group"] = key_group_name

    # Get the fieldmaps out and add their types
    print(fieldmap_lookup[path])
    fieldmap_types = sorted([fmap.entities['fmap'] for fmap in fieldmap_lookup[path]])
    for fmap_num, fmap_type in enumerate(fieldmap_types):
        example_data['fieldmap_type%02d' % fmap_num] = fmap_type

    # Expand slice timing to multiple columns
    SliceTime = example_data.get('SliceTiming')
    if SliceTime:
        # round each slice time to one place after the decimal
        for i in range(len(SliceTime)):
            SliceTime[i] = round(SliceTime[i], 1)
        example_data.update(
            {"SliceTime%03d" % SliceNum: time for
             SliceNum, time in enumerate(SliceTime)})
        del example_data['SliceTiming']

    dfs.append(example_data)

In [None]:
example_data