In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
ROOT_PATH = "C:/Data/rf100-vl"

In [None]:
from pprint import pprint

from rf100vl.util import DATASET_TO_CATEGORY_JSON

pprint(DATASET_TO_CATEGORY_JSON)

In [None]:
datasets_to_download = list(DATASET_TO_CATEGORY_JSON.keys())
categories_to_download = list(DATASET_TO_CATEGORY_JSON.values())

In [None]:
from rf100vl import get_rf100vl_projects

projects = get_rf100vl_projects()

for project in projects:
    if not categories_to_download or project.category not in categories_to_download:
        continue
    if not datasets_to_download or project.name not in datasets_to_download:
        continue

    project.download(path=ROOT_PATH, overwrite=False)
    break

In [None]:
import tlc

In [None]:
tlc.register_url_alias("RF100_ROOT", ROOT_PATH, force=True)

In [None]:
from pathlib import Path

project_name = "RF100VL"
dataset_folders = [f for f in Path(ROOT_PATH).iterdir() if f.is_dir()]

for dataset in dataset_folders:
    DATASET_CATEGORY = DATASET_TO_CATEGORY_JSON[dataset.name].replace("/", "_")
    print(f"Registering tables for dataset {dataset.name} in category {DATASET_CATEGORY}")

    for split in ["train"]:  # ["train", "test", "valid"]:
        anno_path = dataset / split / "_annotations.coco.json"
        if not anno_path.exists():
            print(f"Annotation file {anno_path} does not exist")

        try:
            table = tlc.Table.from_coco(
                anno_path,
                table_name=split,
                dataset_name=f"{DATASET_CATEGORY}-{dataset.name}",
                project_name=project_name,
            )
        except Exception as e:
            print(f"Error registering table {split} for dataset {dataset.name}: {e}")

In [None]:
local_project_url = tlc.Url.create_run_url(project_name=project_name).parent.parent
local_data_url = tlc.Url(ROOT_PATH)

s3_root = tlc.Url("s3://3lc-projects")
s3_project_url = s3_root / project_name
s3_data_url = s3_root / "data" / tlc.Url(ROOT_PATH).name

In [None]:
print("Copying project to S3..")
print(f"  Local: {local_project_url}")
print(f"  S3: {s3_project_url}")

In [None]:
!aws s3 sync {local_project_url} {s3_project_url}

In [None]:
print("Copying data to S3..")
print(f"  Local: {local_data_url}")
print(f"  S3: {s3_data_url}")

In [None]:
print(s3_data_url)

In [None]:
tlc.register_project_url_alias(
    "RF100_ROOT",
    s3_data_url,
    project=project_name,
    root=s3_project_url.parent,
)

In [None]:
!aws s3 sync {local_data_url} {s3_data_url}