In [29]:
import os
import shutil
from random import shuffle
import yaml
import json
import cv2

In [30]:
CONSTRUCT_BASE_FOLDER = lambda x: f".\\scarecrow_dataset\\{x}"
VARIANTS = ["train", "test", "val"]

OUTPUT_FOLDER = "./output"


In [31]:
# Create the folder structure
if not os.path.exists(OUTPUT_FOLDER):
    os.mkdir(OUTPUT_FOLDER)

for variant in VARIANTS:
    annotations_folder = os.path.join(OUTPUT_FOLDER, variant, 'labels')
    if not os.path.exists(annotations_folder):
        os.makedirs(annotations_folder)
        
    image_folder = os.path.join(OUTPUT_FOLDER, variant, 'images')
    if not os.path.exists(image_folder):
        os.makedirs(image_folder)

In [32]:
def construct_annotation_line(image_width: int, image_height: int, annotation: dict):
    # Extract COCO bounding box
    coords = annotation["Coordinates"]

    # Convert to YOLO format
    x_center = (coords[0]["X"] + coords[1]["X"]) / 2
    y_center = (coords[1]["Y"] + coords[2]["Y"]) / 2
    width = coords[1]["X"] - coords[0]["X"]
    height = coords[2]["Y"] - coords[1]["Y"]

    x_center_norm = x_center / image_width
    y_center_norm = y_center / image_height
    width_norm = width / image_width
    height_norm = height / image_height

    return f"0 {x_center_norm} {y_center_norm} {width_norm} {height_norm}"

In [33]:
# {"image": path/to/image, "annotations": str_including_newlines}
combination = {x: [] for x in VARIANTS}

for variant in VARIANTS:
    base_folder = CONSTRUCT_BASE_FOLDER(variant)
    annotations_file = os.path.join(base_folder, "annotations.json")
    
    with open(annotations_file, "r") as f:
        data = json.load(f)
        
    for image in data:
        # image["annotations"] = list(filter(lambda x: x["image_id"] == image["id"], data["annotations"]))

        image_path = str(os.path.join(base_folder, "images", image["OriginalFileName"]))
        img = cv2.imread(image_path)
        img_width, img_height = img.shape[1], img.shape[0]

        # Image
        comb = {"image": image_path}
        
        # Output the annotation
        lines = [construct_annotation_line(img_width, img_height, x) for x in image["AnnotationData"]]
        comb["annotations"] = "\n".join(lines)
        
        combination[variant].append(comb)

In [34]:

for variant in VARIANTS:
    variant_output_folder = os.path.join(OUTPUT_FOLDER, variant)
    for comb in combination[variant]:
        file_name = comb["image"].rsplit("\\", 1)[-1].rsplit(".", 1)
        base_file_name = file_name[0]
        extension = file_name[1]

        # Make the image a jpeg
        output_path = str(os.path.join(variant_output_folder, "images", base_file_name + "." + extension))
        shutil.copy(comb["image"], output_path)
        
        file_name = str(os.path.join(variant_output_folder, "labels", base_file_name + ".txt"))
        with open(file_name, "w") as f:
            f.write(comb["annotations"])

In [35]:
category_names = {0: "bird"}
    
content = {
    "nc": 1,
    "train": "train/images",
    "test": "test/images",
    "val": "valid/images",
    "names": category_names
}

with open(os.path.join(OUTPUT_FOLDER, "data.yaml"), "w") as f:
    yaml.dump(content, f)