Suggested improvement: format the dataset into a suitable format for traiing the mega detector

IMPORTANT:

Preparing the dataset in YOLO txt labels and converting it into COCO format is an absolutely essential step for the success of this project. Initially, we underestimated the complexity of this task, but after thorough exploration of the relevant GitHub repositories, we gained a better understanding of the process. We decided to create a new COCO file from scratch, rather than filtering out the existing file, as we initially considered. However, we soon realized that generating labels directly from the segment masks was not the most efficient approach. This learning has guided us towards a more refined strategy moving forward.



## Preparing YOLO and MegaDetector format label files

In [31]:
from google.colab import drive
drive.mount('/content/drive')

!cp -r /content/drive/MyDrive/dataset /content/

Mounted at /content/drive


In [32]:
import os
import cv2

# Set updated paths
raw_image_dir = '/content/drive/MyDrive/cod10k_filtered/train_images_unzipped/Image'
mask_dir = '/content/drive/MyDrive/cod10k_filtered/mask_unzipped/GT_Object'
labels_dir = '/content/drive/MyDrive/cod10k_filtered/labels'

os.makedirs(labels_dir, exist_ok=True)

# Optional: class index
class_id = 0  # animal

for mask_filename in os.listdir(mask_dir):
    if not mask_filename.endswith('.png'):  # adjust extension if needed
        continue

    # Paths
    mask_path = os.path.join(mask_dir, mask_filename)
    image_path = os.path.join(raw_image_dir, mask_filename)

    # Load mask and image
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    if mask is None:
        print(f"[ERROR] Could not read mask: {mask_filename}")
        continue

    height, width = mask.shape

    # Find contours in mask (white regions)
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    yolo_annotations = []
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)

        # Convert to YOLO format
        x_center = (x + w / 2) / width
        y_center = (y + h / 2) / height
        norm_w = w / width
        norm_h = h / height

        yolo_annotations.append(f"{class_id} {x_center:.6f} {y_center:.6f} {norm_w:.6f} {norm_h:.6f}")

    # Save .txt file with YOLO annotations
    label_filename = os.path.splitext(mask_filename)[0] + '.txt'
    label_path = os.path.join(labels_dir, label_filename)

    with open(label_path, 'w') as f:
        f.write('\n'.join(yolo_annotations))

print("✅ YOLO label files created.")


✅ YOLO label files created.


In [None]:

import os
import cv2
import json
from tqdm import tqdm

# === Corrected Paths ===
raw_image_dir = '/content/drive/MyDrive/cod10k_filtered/train_images_unzipped/Image'
mask_dir = '/content/drive/MyDrive/cod10k_filtered/mask_unzipped/GT_Object'
output_json_path = '/content/drive/MyDrive/cod10k_filtered/coco/instances.json'

# Create the output folder if it doesn't exist
os.makedirs(os.path.dirname(output_json_path), exist_ok=True)

# === Initialize COCO Format Dictionary ===
coco = {
    "images": [],
    "annotations": [],
    "categories": [
        {"id": 1, "name": "animal"}
    ]
}

annotation_id = 1
image_id = 1

# Prepare list of image filenames
image_files = {os.path.splitext(f)[0]: f for f in os.listdir(raw_image_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))}

# Process each mask
for mask_filename in tqdm(os.listdir(mask_dir), desc="Processing masks"):
    if not mask_filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        continue

    base_name = os.path.splitext(mask_filename)[0]
    image_filename = image_files.get(base_name)

    if not image_filename:
        print(f"[SKIP] No matching image for mask: {mask_filename}")
        continue

    image_path = os.path.join(raw_image_dir, image_filename)
    mask_path = os.path.join(mask_dir, mask_filename)

    # Load image and mask
    img = cv2.imread(image_path)
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

    if img is None:
        print(f"[ERROR] Could not read image: {image_filename}")
        continue
    if mask is None:
        print(f"[ERROR] Could not read mask: {mask_filename}")
        continue
    if cv2.countNonZero(mask) == 0:
        print(f"[SKIP] Empty mask: {mask_filename}")
        continue

    height, width, _ = img.shape

    # === Register image ===
    coco["images"].append({
        "id": image_id,
        "file_name": image_filename,
        "width": width,
        "height": height
    })

    # === Get contours from mask ===
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    for contour in contours:
        if cv2.contourArea(contour) < 10:
            continue  # skip small blobs/noise

        x, y, w, h = cv2.boundingRect(contour)
        area = float(w * h)
        segmentation = contour.flatten().tolist()

        coco["annotations"].append({
            "id": annotation_id,
            "image_id": image_id,
            "category_id": 1,  # animal
            "bbox": [float(x), float(y), float(w), float(h)],
            "area": area,
            "iscrowd": 0,
            "segmentation": [segmentation]
        })

        annotation_id += 1

    image_id += 1

# === Save COCO JSON ===
with open(output_json_path, 'w') as f:
    json.dump(coco, f, indent=4)

print(f"\n✅ COCO annotations saved to: {output_json_path}")


Processing masks:  40%|███▉      | 2090/5242 [40:06<57:34,  1.10s/it]