Convert Inria dataset segmentation mask to coco object detection annotations
https://project.inria.fr/aerialimagelabeling/contest/


In [None]:
import os
import json
import numpy as np
import rasterio
from skimage.measure import label, regionprops

folder = "/home/romeokienzler/Downloads/AerialImageDatasetTiledMergedFixedLabels_sample"
images = []
annotations = []
categories = [{"id": 1, "name": "object"}]

ann_id = 0

for img_id, filename in enumerate(sorted(os.listdir(folder))):
    if not filename.endswith("_label.tif"):
        continue

    label_path = os.path.join(folder, filename)
    train_path = label_path.replace("_label.tif", "_train.tif")

    # Read label TIFF
    with rasterio.open(label_path) as src:
        mask = src.read(1)
        height, width = mask.shape

    images.append({
        "id": img_id,
        "file_name": os.path.basename(train_path),
        "width": width,
        "height": height
    })

    # Find connected components (objects)
    labeled = label(mask == 1)
    for region in regionprops(labeled):
        y_min, x_min, y_max, x_max = region.bbox
        bbox = [int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)]

        annotations.append({
            "id": ann_id,
            "image_id": img_id,
            "category_id": 1,
            "bbox": bbox,
            "area": int(region.area),
            "iscrowd": 0,
        })
        ann_id += 1

# Build COCO dict
coco_output = {
    "images": images,
    "annotations": annotations,
    "categories": categories
}

# Save JSON
with open(os.path.join(folder, "annotations_coco.json"), "w") as f:
    json.dump(coco_output, f, indent=2)

print(f"Created {len(annotations)} annotations for {len(images)} images.")


In [None]:
import os
import random
import json
import numpy as np
import rasterio
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# Path to your folder and JSON
json_path = os.path.join(folder, "annotations_coco.json")

# Load COCO annotations
with open(json_path, "r") as f:
    coco = json.load(f)

images = coco["images"]
annotations = coco["annotations"]

# Build image_id -> annotations mapping for faster access
ann_by_img = {}
for ann in annotations:
    ann_by_img.setdefault(ann["image_id"], []).append(ann)

# Pick 10 random images (or fewer if less than 10)
n_show = min(10, len(images))
for img_info in random.sample(images, n_show):
    img_path = os.path.join(folder, img_info["file_name"])
    if not os.path.exists(img_path):
        print(f"Missing image: {img_path}")
        continue

    with rasterio.open(img_path) as src:
        img = src.read([1, 2, 3]) if src.count >= 3 else np.repeat(src.read(1)[None, :, :], 3, axis=0)
        img = np.transpose(img, (1, 2, 0))
        img = (img - img.min()) / (img.max() - img.min() + 1e-6)

    fig, ax = plt.subplots(figsize=(8, 8))
    ax.imshow(img)
    ax.set_title(img_info["file_name"])
    ax.axis("off")

    # Draw bounding boxes
    for ann in ann_by_img.get(img_info["id"], []):
        x, y, w, h = ann["bbox"]
        rect = patches.Rectangle(
            (x, y), w, h,
            linewidth=2,
            edgecolor='red',
            facecolor='none'
        )
        ax.add_patch(rect)

    plt.show()


In [None]:
import os
import random
import json
import numpy as np
import rasterio
import matplotlib.pyplot as plt
import matplotlib.patches as patches

json_path = os.path.join(folder, "annotations_coco.json")

# Load COCO annotations
with open(json_path, "r") as f:
    coco = json.load(f)

images = coco["images"]
annotations = coco["annotations"]

# Build image_id -> annotations mapping
ann_by_img = {}
for ann in annotations:
    ann_by_img.setdefault(ann["image_id"], []).append(ann)

# 5x5 grid
grid_rows, grid_cols = 5, 5
n_show = min(grid_rows * grid_cols, len(images))
sample_images = random.sample(images, n_show)

fig, axes = plt.subplots(grid_rows, grid_cols, figsize=(20, 20))
axes = axes.flatten()

for ax, img_info in zip(axes, sample_images):
    img_path = os.path.join(folder, img_info["file_name"])
    if not os.path.exists(img_path):
        ax.axis("off")
        continue

    # Load image
    with rasterio.open(img_path) as src:
        img = src.read([1, 2, 3]) if src.count >= 3 else np.repeat(src.read(1)[None, :, :], 3, axis=0)
        img = np.transpose(img, (1, 2, 0))
        img = (img - img.min()) / (img.max() - img.min() + 1e-6)

    ax.imshow(img)
    ax.set_title(img_info["file_name"], fontsize=8)
    ax.axis("off")

    # Draw bounding boxes
    for ann in ann_by_img.get(img_info["id"], []):
        x, y, w, h = ann["bbox"]
        rect = patches.Rectangle((x, y), w, h, linewidth=1.5, edgecolor='red', facecolor='none')
        ax.add_patch(rect)

# Hide any remaining empty axes
for ax in axes[n_show:]:
    ax.axis("off")

plt.tight_layout()
plt.show()
