In [1]:
from pathlib import Path

"""
After running through a survey, these are visualizations you've found with dolphins
"""
SURVEY_DIR = Path("/workspaces/cv/data/surveys")
LAST_ANNOTATION_DIR = Path("/workspaces/cv/data/ml/annotated/202505-dolphins")
NEW_ANNOTATION_DIR = Path("/workspaces/cv/data/ml/annotated/202505v2-dolphins")
SURVEY_TYPE = "action-aviation-multicamera"
SURVEY_NAME = "20250405_west_coast_4camera"
FLIGHT = 1
DOLPHIN_FILENAMES = (
    "_09R3408.JPG",
    "_28L0468.JPG",
    "_28R5515.JPG",
    "_28R6505.JPG",
    "_28R6790.JPG",
)

In [6]:
import json

with open(LAST_ANNOTATION_DIR / "all.json") as f:
    last = json.load(f)


In [3]:
# Find new ones to annotate:
import shutil
from PIL import Image

to_annotate_dir = Path("/workspaces/cv/.tmp/to_annotate")
if to_annotate_dir.exists():
    if list(to_annotate_dir.iterdir()):
        raise RuntimeError("Annotate dir exists - delete if it you're happy you don't need it")
to_annotate_dir.mkdir(exist_ok=True, parents=True)
# Find the images in the survey di
to_annotate = {}

all_files = list((SURVEY_DIR / SURVEY_TYPE / SURVEY_NAME / f"flight_{FLIGHT}").rglob("*.JPG"))
print(len(all_files), "files in survey dir")
for fname in DOLPHIN_FILENAMES:
    print(fname)
    imgpath = [i for i in all_files if i.name == fname]
    print(imgpath)
    assert len(imgpath) == 1
    imgpath = imgpath[0]
    # Remove orientation tag by opening/saving:
    img = Image.open(imgpath)
    assert img.width > img.height
    # Does it exist?
    coco_fname = f"{SURVEY_TYPE}__{SURVEY_NAME}__flight_{FLIGHT}__{imgpath.name}"
    existing = [i for i in last["images"] if i["file_name"] == coco_fname]
    if existing:
        assert len(existing) == 1
        print(fname, "already exists")
    else:
        print("=> Need to annotate", fname, coco_fname)
        img.save(to_annotate_dir / coco_fname)
        to_annotate[coco_fname] = imgpath

14952 files in survey dir
_09R3408.JPG
[PosixPath('/workspaces/cv/data/surveys/action-aviation-multicamera/20250405_west_coast_4camera/flight_1/cameras/R09/DCIM/10250405/_09R3408.JPG')]
=> Need to annotate _09R3408.JPG action-aviation-multicamera__20250405_west_coast_4camera__flight_1___09R3408.JPG
_28L0468.JPG
[PosixPath('/workspaces/cv/data/surveys/action-aviation-multicamera/20250405_west_coast_4camera/flight_1/cameras/L28/DCIM/10150405/_28L0468.JPG')]
=> Need to annotate _28L0468.JPG action-aviation-multicamera__20250405_west_coast_4camera__flight_1___28L0468.JPG
_28R5515.JPG
[PosixPath('/workspaces/cv/data/surveys/action-aviation-multicamera/20250405_west_coast_4camera/flight_1/cameras/R28/DCIM/10150405/_28R5515.JPG')]
=> Need to annotate _28R5515.JPG action-aviation-multicamera__20250405_west_coast_4camera__flight_1___28R5515.JPG
_28R6505.JPG
[PosixPath('/workspaces/cv/data/surveys/action-aviation-multicamera/20250405_west_coast_4camera/flight_1/cameras/R28/DCIM/10150405/_28R6505

In [None]:
# Right, go upload that to labelme, annotate, and copy back to tmp dir
assert False

In [7]:
from datetime import datetime

date_added = datetime.now().isoformat()

for coco_fname, img_path in to_annotate.items():
    new_img_id = len(last["images"])
    assert new_img_id not in [i["id"] for i in last["images"]]
    img = Image.open(to_annotate_dir / coco_fname)
    assert img.width > img.height
    last["images"].append(
        {
            "id": new_img_id,
            "file_name": coco_fname,
            "width": img.width,
            "height": img.height,
            "meta": {
                "survey_type": SURVEY_TYPE,
                "survey": SURVEY_NAME,
                "flight": FLIGHT,
                "date_added_to_dataset": date_added,
            },
        }
    )

    annotation_path = to_annotate_dir / f"{coco_fname.split('.')[0]}.json"
    # Copy the annotation to the new dir
    with open(annotation_path) as f:
        annotation = json.load(f)
    for shape in annotation["shapes"]:
        assert shape["label"] == "maui"
        x0y0, x1y1 = shape["points"]
        x0 = int(x0y0[0])
        y0 = int(x0y0[1])
        w = int(x1y1[0] - x0y0[0])
        h = int(x1y1[1] - x0y0[1])
        new_annotation_id = len(last["annotations"])
        assert new_annotation_id not in [i["id"] for i in last["annotations"]]
        last["annotations"].append(
            {"id": new_annotation_id, "image_id": new_img_id, "category_id": 0, "bbox": [x0, y0, w, h]}
        )

assert len(last["images"]) == len(set(i["id"] for i in last["images"]))
assert len(last["images"]) == len(set(i["image_id"] for i in last["annotations"]))
assert len(last["annotations"]) == len(set(i["id"] for i in last["annotations"]))

In [8]:
# Now save it
NEW_ANNOTATION_DIR.mkdir(exist_ok=True)
with open(NEW_ANNOTATION_DIR / "all.json", "w") as f:
    json.dump(last, f, indent=2)

In [9]:
# Copy it over
imgdir = NEW_ANNOTATION_DIR / "all"
imgdir.mkdir(exist_ok=True, parents=True)

# Copy old ones:
for img in (LAST_ANNOTATION_DIR / "all").iterdir():
    shutil.copy(img, imgdir / img.name)

# Copy new ones - the raw image:
for coco_fname, img_path in to_annotate.items():
    shutil.copy(img_path, imgdir / coco_fname)

# Check it all makes sense
assert len(list(imgdir.iterdir())) == len(last["images"])
for img in last["images"]:
    assert (imgdir / img["file_name"]).exists()

# Save the annotation
with open(NEW_ANNOTATION_DIR / "all.json", "w") as f:
    json.dump(last, f, indent=2)