In [None]:
%matplotlib widget
import fiftyone as fo
import fiftyone.utils.random as four
from fiftyone import ViewField as F

from tilcvtrainer import JSONLImporter

In [None]:
dataset_dir = "../data/raw"

In [None]:
importer = JSONLImporter(dataset_dir)
if fo.dataset_exists("til24cvraw"):
    dataset = fo.load_dataset("til24cvraw")
else:
    dataset = fo.Dataset.from_importer(
        importer, name="til24cvraw", persistent=True, overwrite=True
    )

In [None]:
# Create splits
four.random_split(dataset, {"train": 0.8, "val": 0.2}, seed=42)

### Export to Ultralytics

In [None]:
# Relabel all detections to UFO (lol)
view: fo.DatasetView = dataset.set_field(
    "ground_truth.detections", F("detections").map(F().set_field("label", "UFO"))
)

In [None]:
for split in ("train", "val"):
    v: fo.DatasetView = view.match_tags(split)
    v.export(
        export_dir="data/til24ufo",
        dataset_type=fo.types.YOLOv5Dataset,
        label_field="ground_truth",
        split=split,
        classes=["UFO"],
        export_media=True,
        include_path=False,
    )

### To Caption Dataset

In [None]:
from fiftyone.core.patches import PatchesView

In [None]:
pats: PatchesView = dataset.to_patches("ground_truth")
pats.untag_samples(["train", "val"])
four.random_split(pats, {"train": 0.95, "val": 0.05}, seed=42)

In [None]:
export_dirs = dict(
    train="../data/til24id/train",
    val="../data/til24id/val",
)

In [None]:
for split in ("train", "val"):
    v: fo.DatasetView = pats.match_tags(split)
    v.export(
        export_dir=export_dirs[split],
        export_media=True,
        abs_paths=False,
        label_field="ground_truth",
        dataset_type=fo.types.FiftyOneImageClassificationDataset,
    )

In [None]:
for split in ("train", "val"):
    tmp_ds = fo.Dataset.from_dir(
        dataset_dir=export_dirs[split],
        dataset_type=fo.types.FiftyOneImageClassificationDataset,
    )
    tmp_ds.export(
        export_dir=export_dirs[split],
        export_media=False,
        abs_paths=True,
        dataset_type=fo.types.CSVDataset,
        fields={"ground_truth.label": "title", "filepath": "filepath"},
    )

In [None]:
fo.launch_app(pats)

In [None]:
import pandas as pd

In [None]:
train_df = pd.read_csv("/workspaces/til24-cv-trainer/data/til24id/train/labels.csv")
val_df = pd.read_csv("/workspaces/til24-cv-trainer/data/til24id/val/labels.csv")

In [None]:
train_df.to_csv(
    "/workspaces/til24-cv-trainer/data/til24id/train/labels.csv", index=False, sep="\t"
)
val_df.to_csv(
    "/workspaces/til24-cv-trainer/data/til24id/val/labels.csv", index=False, sep="\t"
)