In [None]:
from pathlib import Path
import os
# !pip install gcapi
import gcapi 
import json
import tqdm

In [None]:
# This is a helper script to upload the private testing sets to Grand Challenge
# The testing sets follows the same structure as the public dataset

In [None]:
# Proide a api token with write access
API_TOKEN = ""

In [None]:
def map_case_content_to_interfaces(case):
    return {
        "frame-rate": json.loads(Path(case[0]).read_text()),
        "magnetic-field-strength": json.loads(Path(case[1]).read_text()),
        "scanned-region": json.loads(Path(case[2]).read_text()),
        "mri-linac-series": [Path(case[3])],
        "mri-linac-target": [Path(case[4])],
    }

In [None]:
# Set the GC archive to upload to

# prelimary testing phase
ARCHIVE_SLUG = "trackrad2025-preliminary-testing-dataset"
DATASET_DIR = "../dataset/preliminary"

# final testing phase
ARCHIVE_SLUG = "trackrad2025-final-testing-dataset"
DATASET_DIR = "../dataset/testing"

# override for testing this script
#DATASET_DIR = "../dataset/example"

# safety switch to prevent accidental uploads
# as uploads are NOT idempotent
perform_upload = False

In [None]:
# Provide the expected cases to upload
case_ids = sorted(os.listdir(DATASET_DIR))
case_ids = [case_id for case_id in case_ids if os.path.isdir(f"{DATASET_DIR}/{case_id}")]

COLLECTED_CASES_FILES = [
    # for: frame-rate, magnetic-field-strength, scanned-region, mri-linac-series, mri-linac-target
    [f"{DATASET_DIR}/{case_id}/frame-rate.json", 
     f"{DATASET_DIR}/{case_id}/b-field-strength.json", 
     f"{DATASET_DIR}/{case_id}/scanned-region.json", 
     f"{DATASET_DIR}/{case_id}/images/{case_id}_frames.mha", 
     f"{DATASET_DIR}/{case_id}/targets/{case_id}_first_label.mha"]
    for case_id in case_ids
]

In [None]:
# Perform a sanity-check to see if we have all the files we expect
for case_files in tqdm.tqdm(COLLECTED_CASES_FILES):
    for file in case_files:
        path = Path(file)
        if not path.exists():
            raise RuntimeError(f"Could not find {path.absolute()}")
    # cheeck if the mapping is correct
    content = map_case_content_to_interfaces(case_files)

In [None]:
if perform_upload:
    client = gcapi.Client(token=API_TOKEN)
    archive = client.archives.detail(slug=ARCHIVE_SLUG)
    archive_api_url = archive["api_url"]

    for case_files in tqdm.tqdm(COLLECTED_CASES_FILES):
        content = map_case_content_to_interfaces(case_files)
        archive_item = client.archive_items.create(archive=archive_api_url, values=[])
        client.update_archive_item(archive_item_pk=archive_item["pk"], values=content)

In [None]:
# prepare ground truth archive .tar.gz using python
# only contains
# {case_id}/targets/{case_id}_labels.mha

import tarfile

filename = os.path.join(os.path.dirname(DATASET_DIR), os.path.basename(DATASET_DIR) + "_gt.tar.gz")

with tarfile.open(filename, "w:gz") as tar:
    for case_id in case_ids:
        target = f"{DATASET_DIR}/{case_id}/targets/{case_id}_labels.mha"
        if not os.path.exists(target):
            raise RuntimeError(f"Could not find {target}")
        tar.add(target, arcname=f"{case_id}/targets/{case_id}_labels.mha")


In [None]:
# check if the tar.gz is correct and matches the full dataset
with tarfile.open(filename, "r:gz") as tar:
    for member in tar.getmembers():
        print(member.name, member.size)
        case_id = member.name.split("/")[0]
        !ls -l $DATASET_DIR/{case_id}/targets/{case_id}_labels.mha