In [40]:
from pycocotools.coco import COCO
import cv2
import os
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import json
from pprint import pprint
import shutil

In [38]:
# Paths to COCO data
image_path = "/Users/danielsarmiento/Downloads/train2017/"
annotation_file = "/Users/danielsarmiento/Downloads/annotations/instances_train2017.json"

# Load COCO annotations
coco = COCO(annotation_file)

loading annotations into memory...
Done (t=4.31s)
creating index...
index created!


In [41]:
output_images = "./images/"
output_labels = "./labels/"
classes_file = "classes.txt"

# Limits
MAX_IMAGES_NO_LABEL = 40_000
MAX_IMAGES_PERSON = 10_000
MAX_IMAGES_REST_OF_CATEGORIES = 40_000

# Create directories
os.makedirs(output_images, exist_ok=True)
os.makedirs(output_labels, exist_ok=True)

In [42]:
# Get all categories
categories = coco.loadCats(coco.getCatIds())
category_names = [cat['name'] for cat in categories]
pprint(f"Categories {len(category_names)}: {category_names}")

# Get image IDs and load an image
image_ids = coco.getImgIds()
pprint(f"Total images: {len(image_ids)}")

# Load a specific image
image_info = coco.loadImgs(image_ids[0])[0]
pprint(f"Image Info {image_info}")

("Categories 80: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', "
 "'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', "
 "'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', "
 "'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', "
 "'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', "
 "'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis "
 "racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', "
 "'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', "
 "'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining "
 "table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell "
 "phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', "
 "'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']")
'Total images: 118287'
("Image Info {'license': 3, 'file_name': '0000003918

In [44]:
# Load categories and save to `classes.txt`
categories = coco.loadCats(coco.getCatIds())
category_names = [cat['name'] for cat in categories]
category_name_to_id = {cat['name']: cat['id'] for cat in categories}
category_id_to_index = {cat['id']: idx for idx, cat in enumerate(categories)}

with open(classes_file, "w") as f:
    f.writelines(f"{name}\n" for name in category_names)

print(f"Classes saved to {classes_file}")

# Counters for image categories
no_label_count = 0
person_count = 0
rest_count = 0

# Process images
image_ids = coco.getImgIds()
for image_id in image_ids:
    image_info = coco.loadImgs(image_id)[0]
    img_annotations = coco.loadAnns(coco.getAnnIds(imgIds=image_id))

    # Classify images based on annotations
    if not img_annotations:
        if no_label_count >= MAX_IMAGES_NO_LABEL:
            continue
        no_label_count += 1
    else:
        contains_person = any(ann['category_id'] == category_name_to_id["person"] for ann in img_annotations)
        if contains_person:
            if person_count >= MAX_IMAGES_PERSON:
                continue
            person_count += 1
        else:
            if rest_count >= MAX_IMAGES_REST_OF_CATEGORIES:
                continue
            rest_count += 1

    # Move image to output_images
    src_image_path = os.path.join(image_path, image_info['file_name'])
    dst_image_path = os.path.join(output_images, image_info['file_name'])
    shutil.copy(src_image_path, dst_image_path)

    # Create YOLO-format labels
    label_file_path = os.path.join(output_labels, f"{os.path.splitext(image_info['file_name'])[0]}.txt")
    with open(label_file_path, "w") as label_file:
        for ann in img_annotations:
            category_index = category_id_to_index[ann['category_id']]
            bbox = ann['bbox']  # [x_min, y_min, width, height]
            
            # Convert to YOLO format
            x_center = bbox[0] + bbox[2] / 2
            y_center = bbox[1] + bbox[3] / 2
            width = bbox[2]
            height = bbox[3]

            # Normalize coordinates
            x_center /= image_info['width']
            y_center /= image_info['height']
            width /= image_info['width']
            height /= image_info['height']

            # Write YOLO format: <class_id> <x_center> <y_center> <width> <height>
            label_file.write(f"{category_index} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

print("Processing complete.")
print(f"Images with no labels: {no_label_count}")
print(f"Images with person: {person_count}")
print(f"Images with other categories: {rest_count}")

Classes saved to classes.txt
Processing complete.
Images with no labels: 1021
Images with person: 10000
Images with other categories: 40000
