In [9]:
import random
import _fixpath

import constants

SOURCE_DIR = constants.ROOT_DIR / ".ignore" / "all_action_inferences"
DATA_DIR = constants.DATA_DIR / "fullsize" / "202404-false-positives-first-action-aviation-survey"
LABEL_ROOT = SOURCE_DIR / "annotations"
IMG_ROOT = SOURCE_DIR / "imgs"
TRAIN_PPN = 0.9

# Get labels
labels = sorted(LABEL_ROOT.iterdir())
imgs = sorted(IMG_ROOT.iterdir())

print(len(labels))
print(len(imgs))
label_stems = set([p.name.replace(".JPG.json", "") for p in labels])
img_stems = set([p.stem for p in imgs])
assert len(label_stems) == len(labels)
assert len(img_stems) == len(imgs)
assert not label_stems.symmetric_difference(img_stems)

n_train = int(len(label_stems) * TRAIN_PPN)
train = set(random.sample(label_stems, n_train))
test = label_stems.symmetric_difference(train)
assert len(train) + len(test) == len(label_stems)
assert not train.intersection(test)

582
582


In [10]:
import json
from pathlib import Path
import shutil
from PIL import Image


def make_coco(split, dolphin_name, img_root, label_root, stems, odir: Path):
    imgdir = odir / split
    if imgdir.exists():
        shutil.rmtree(imgdir)
    imgdir.mkdir(parents=True)
    all_images = []
    all_annotations = []
    for stem in sorted(list(stems)):
        img_path = img_root / f"{stem}.JPG"
        label_path = label_root / f"{stem}.JPG.json"
        shutil.copy(img_path, imgdir)
        try:
            # coco
            img = Image.open(img_path)
            width, height = img.size
            # annotations first
            annotations = []
            with open(label_path) as f:
                for label in json.load(f):
                    x0 = int(label["x0"])
                    y0 = int(label["y0"])
                    x1 = int(label["x1"])
                    y1 = int(label["y1"])
                    w = x1 - x0
                    h = y1 - y0
                    annotations.append(
                        dict(
                            id=len(all_annotations),
                            image_id=len(all_images),
                            bbox=[x0, y0, w, h],
                            area=w * h,
                            category_id=0,
                        )
                    )

            # Add both if we succeeded
            all_annotations.append(annotations)
            all_images.append(dict(file_name=img_path.name, height=height, width=width, id=len(all_images)))

        except:  # NOQA
            print(f"Failed to parse annotations for {stem}")

    categories = [dict(id=0, name=dolphin_name)]
    with open(odir / f"{split}.json", "w") as f:
        json.dump(dict(images=all_images, annotations=all_annotations, categories=categories), f, indent=2)


make_coco("train", "false-positive", IMG_ROOT, LABEL_ROOT, train, DATA_DIR)
make_coco("test", "false-positive", IMG_ROOT, LABEL_ROOT, test, DATA_DIR)