In [None]:
%matplotlib widget
import fiftyone as fo
import fiftyone.utils.random as four
import fiftyone.utils.iou as foui
from fiftyone import ViewField as F

from tilcvtrainer import JSONLImporter

In [None]:
dataset_dir = "../data/raw"

In [None]:
fo.delete_dataset("til24cvraw")

In [None]:
importer = JSONLImporter(dataset_dir)
if fo.dataset_exists("til24cvraw"):
    dataset = fo.load_dataset("til24cvraw")
else:
    dataset = fo.Dataset.from_importer(
        importer, name="til24cvraw", persistent=True, overwrite=True
    )

In [None]:
# Relabel all detections to UFO (lol)
view: fo.DatasetView = dataset.set_field(
    "ground_truth.detections", F("detections").map(F().set_field("label", "UFO"))
)

In [None]:
num_objects = F("ground_truth.detections").length()
foui.compute_max_ious(dataset, "ground_truth")

# The `(min, max)` number of predictions per sample
print(dataset.bounds(num_objects))

In [None]:
print(dataset.bounds(F("ground_truth.detections.max_iou")))

In [None]:
# Create splits
four.random_split(dataset, {"train": 0.98, "val": 0.02}, seed=42)

### Export to Ultralytics

In [None]:
for split in ("train", "val"):
    v: fo.DatasetView = view.match_tags(split)
    v.export(
        export_dir="data/til24ufo",
        dataset_type=fo.types.YOLOv5Dataset,
        label_field="ground_truth",
        split=split,
        classes=["UFO"],
        export_media=True,
        include_path=False,
    )

### Predict YOLO on Dataset

In [None]:
from fiftyone.utils.ultralytics import (
    FiftyOneYOLODetectionModel,
    FiftyOneYOLODetectionModelConfig,
)
from ultralytics import YOLO
from functools import partial

# Workaround to set confidence level...
yolo = YOLO(
    "/workspaces/til24-cv-trainer/runs/detect/e256-e128ft-v3/weights/best.pt",
    task="detection",
)
wrapped = partial(
    yolo.predict, conf=0.05, iou=0.0, imgsz=1536, half=True, agnostic_nms=True
)
mcfg = FiftyOneYOLODetectionModelConfig({"model": wrapped})
model = FiftyOneYOLODetectionModel(mcfg)

# model = convert_ultralytics_model(
#     YOLO(
#         "/workspaces/til24-cv-trainer/runs/detect/e256-e128ft-v3/weights/best.pt",
#         task="detection",
#     )
# )

In [None]:
view.apply_model(model, label_field="predictions")

In [None]:
results = view.evaluate_detections(
    "predictions",
    gt_field="ground_truth",
    eval_key="eval_predictions",
)
results.print_report()

In [None]:
fo.launch_app(view)

### To Caption Dataset

In [None]:
from fiftyone.core.patches import PatchesView

In [None]:
pats: PatchesView = dataset.to_patches("ground_truth")
pats.untag_samples(["train", "val"])
four.random_split(pats, {"train": 0.95, "val": 0.05}, seed=42)

In [None]:
export_dirs = dict(
    train="../data/til24id/train",
    val="../data/til24id/val",
)
splits = ("train", "val")
padding = 0.5, 0.0

In [None]:
for split, p in zip(splits, padding):
    v: fo.DatasetView = pats.match_tags(split)
    v.export(
        export_dir=export_dirs[split],
        export_media=True,
        abs_paths=False,
        label_field="ground_truth",
        alpha=p,
        image_format=".png",
        dataset_type=fo.types.FiftyOneImageClassificationDataset,
    )

In [None]:
for split in ("train", "val"):
    tmp_ds = fo.Dataset.from_dir(
        dataset_dir=export_dirs[split],
        dataset_type=fo.types.FiftyOneImageClassificationDataset,
    )
    tmp_ds.export(
        export_dir=export_dirs[split],
        export_media=False,
        abs_paths=True,
        dataset_type=fo.types.CSVDataset,
        fields={"ground_truth.label": "title", "filepath": "filepath"},
    )

In [None]:
fo.launch_app(pats)

In [None]:
import pandas as pd

In [None]:
train_df = pd.read_csv("/workspaces/til24-cv-trainer/data/til24id/train/labels.csv")
val_df = pd.read_csv("/workspaces/til24-cv-trainer/data/til24id/val/labels.csv")

In [None]:
train_df.to_csv(
    "/workspaces/til24-cv-trainer/data/til24id/train/labels.csv", index=False, sep="\t"
)
val_df.to_csv(
    "/workspaces/til24-cv-trainer/data/til24id/val/labels.csv", index=False, sep="\t"
)

### Noised Dataset

In [None]:
from tqdm.contrib.concurrent import process_map
import tqdm.notebook
import albumentations as A
from pathlib import Path
import cv2
import os

In [None]:
T = [
    # A.Blur(p=0.01),
    # A.MedianBlur(p=0.01),
    A.RandomBrightnessContrast(p=0.7, contrast_limit=(0.0, 0.3), brightness_limit=0),
    A.AdvancedBlur(
        p=0.4, blur_limit=(3, 17), noise_limit=(0.0, 2.0), beta_limit=(0.0, 4.0)
    ),
    A.MotionBlur(p=0.4, blur_limit=(3, 17)),
    # A.RandomGamma(p=0.0),
    # A.ToGray(p=0.0),
    A.ImageCompression(p=0.6, quality_lower=20, quality_upper=70),
    A.CLAHE(p=0.25),
    A.GaussNoise(p=0.5, per_channel=True, var_limit=(1000.0, 5000.0)),
    A.ISONoise(p=0.5, intensity=(0.1, 0.5), color_shift=(0.03, 0.06)),
]
augment = A.Compose(T)

In [None]:
im_dir = "/workspaces/til24-cv-trainer/data/raw/images"
out_dir = "/workspaces/til24-cv-trainer/data/raw/augmented"

In [None]:
Path(out_dir).mkdir(exist_ok=True, parents=True)


def augment_one(pth):
    img = cv2.imread(str(pth))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    aug = augment(image=img)["image"]
    aug = cv2.cvtColor(aug, cv2.COLOR_RGB2BGR)
    cv2.imwrite(str(Path(out_dir) / pth.name), aug)


nprocs = os.cpu_count()
pths = list(Path(im_dir).glob("*.jpg"))
_ = process_map(
    augment_one, pths, tqdm_class=tqdm.notebook.tqdm, max_workers=nprocs, chunksize=1
)

In [None]:
from PIL import Image
import numpy as np

img = Image.open("/workspaces/til24-cv-trainer/data/raw/images/image_129.jpg")


def _wrap(aug):
    return lambda im: Image.fromarray(aug(image=np.array(im))["image"])


a = _wrap(A.ISONoise(p=1.0))

a(img)