In [1]:
# Cell 1 — imports
import os
import json
import shutil
from pathlib import Path
from sklearn.model_selection import train_test_split


In [2]:
# Cell 2 — config
ROOT = Path("..")  # if running from notebooks/ adjust accordingly
RAW_DIR = ROOT / "data" / "raw"
IMAGES_DIR = ROOT / "data" / "images"
LABELS_DIR = ROOT / "data" / "labels"
LABELLERR_EXPORT_DIR = ROOT / "data" / "labellerr_export"
IMAGES_DIR.mkdir(parents=True, exist_ok=True)
LABELS_DIR.mkdir(parents=True, exist_ok=True)


In [3]:
# Cell 3 — simple converter: COCO-like Labelerr -> YOLO format
# Assumes labellerr_export contains json files per image or single coco.json.
def convert_coco_to_yolo(coco_json_path, images_src_dir, yolo_labels_out_dir, classes=None):
    with open(coco_json_path, "r") as f:
        coco = json.load(f)
    imgs = {img["id"]: img for img in coco["images"]}
    anns = coco["annotations"]
    categories = {c["id"]: c["name"] for c in coco["categories"]}
    if classes is None:
        classes = [categories[cid] for cid in sorted(categories.keys())]
    class_to_idx = {name: idx for idx, name in enumerate(classes)}
    yolo_labels_out_dir = Path(yolo_labels_out_dir)
    yolo_labels_out_dir.mkdir(parents=True, exist_ok=True)

    for ann in anns:
        img = imgs[ann["image_id"]]
        img_w, img_h = img["width"], img["height"]
        bbox = ann["bbox"]  # [x,y,w,h] upper-left x,y
        x, y, w, h = bbox
        cx = x + w / 2
        cy = y + h / 2
        x_rel = cx / img_w
        y_rel = cy / img_h
        w_rel = w / img_w
        h_rel = h / img_h
        cat_name = categories[ann["category_id"]]
        cls = class_to_idx[cat_name]
        # write label file per image
        img_name = img["file_name"]
        name_noext = os.path.splitext(img_name)[0]
        label_path = yolo_labels_out_dir / f"{name_noext}.txt"
        with open(label_path, "a") as lf:
            lf.write(f"{cls} {x_rel:.6f} {y_rel:.6f} {w_rel:.6f} {h_rel:.6f}\n")


In [None]:
# Cell 4 — example usage (adjust paths)
coco_json = LABELLERR_EXPORT_DIR / "coco_annotations.json"
convert_coco_to_yolo(coco_json, RAW_DIR / "images", LABELS_DIR)


In [None]:
# Cell 5 — split train/val/test and write to data/ images/ and labels/
import glob
image_files = sorted(glob.glob(str(RAW_DIR / "images" / "*.*")))
train, rest = train_test_split(image_files, test_size=0.3, random_state=42)
val, test = train_test_split(rest, test_size=0.5, random_state=42)

def symlink_or_copy(src_list, dest_img_dir, dest_label_dir):
    os.makedirs(dest_img_dir, exist_ok=True)
    os.makedirs(dest_label_dir, exist_ok=True)
    for p in src_list:
        src = Path(p)
        dst_img = Path(dest_img_dir) / src.name
        if not dst_img.exists():
            shutil.copy(src, dst_img)  # copy safer for notebooks
        label_src = LABELS_DIR / f"{src.stem}.txt"
        if label_src.exists():
            shutil.copy(label_src, Path(dest_label_dir) / label_src.name)

symlink_or_copy(train, ROOT / "data" / "images" / "train", ROOT / "data" / "labels" / "train")
symlink_or_copy(val,   ROOT / "data" / "images" / "val",   ROOT / "data" / "labels" / "val")
symlink_or_copy(test,  ROOT / "data" / "images" / "test",  ROOT / "data" / "labels" / "test")
