In [1]:
import os
from pycocotools.coco import COCO
import tensorflow as tf
import numpy as np

annotation_path = './annotations/instances_train2017.json'
image_dir = 'path/to/train2017'

coco = COCO(annotation_path)

target_classes = [1, 62]  # person and tv/monitor classes
cat_ids = coco.getCatIds(catNms=['person', 'tv'])

img_ids = coco.getImgIds(catIds=cat_ids)
images = coco.loadImgs(img_ids)

loading annotations into memory...


FileNotFoundError: [Errno 2] No such file or directory: 'path/to/annotations/instances_train2017.json'

In [None]:
import keras_cv
from tensorflow import keras

# (including background as the 0th class)
num_classes = 2  # person and tv/monitor
retinanet = keras_cv.models.RetinaNet(
    classes=num_classes,
    backbone="resnet50",
    bounding_box_format="xywh",
)
retinanet.compile(
    classification_loss="focal", 
    box_loss="smoothl1", 
    optimizer="adam",
)

In [None]:
def load_image_and_boxes(img_id):
    # Load image
    img_info = coco.loadImgs(img_id)[0]
    img_path = os.path.join(image_dir, img_info['file_name'])
    image = tf.image.decode_jpeg(tf.io.read_file(img_path))
    image = tf.image.resize(image, (640, 640))  # Resize for RetinaNet input

    # Load annotations
    ann_ids = coco.getAnnIds(imgIds=img_id, catIds=cat_ids, iscrowd=False)
    annotations = coco.loadAnns(ann_ids)

    boxes = []
    labels = []
    for ann in annotations:
        bbox = ann['bbox']
        category_id = ann['category_id']
        label = target_classes.index(category_id) + 1  # Offset for background class

        boxes.append(bbox)
        labels.append(label)
    
    boxes = np.array(boxes, dtype=np.float32)
    labels = np.array(labels, dtype=np.int32)
    
    return image, {"boxes": boxes, "classes": labels}

def coco_generator(img_ids):
    for img_id in img_ids:
        yield load_image_and_boxes(img_id)

# Create TensorFlow dataset
dataset = tf.data.Dataset.from_generator(
    lambda: coco_generator(img_ids),
    output_signature=(
        tf.TensorSpec(shape=(640, 640, 3), dtype=tf.float32),
        {
            "boxes": tf.TensorSpec(shape=(None, 4), dtype=tf.float32),
            "classes": tf.TensorSpec(shape=(None,), dtype=tf.int32),
        },
    ),
)
dataset = dataset.batch(4).prefetch(2)

In [None]:
retinanet.fit(dataset, epochs=10)

In [None]:
val_dataset = tf.data.Dataset.from_generator(
    lambda: coco_generator(img_ids),
    output_signature=(
        tf.TensorSpec(shape=(640, 640, 3), dtype=tf.float32),
        {
            "boxes": tf.TensorSpec(shape=(None, 4), dtype=tf.float32),
            "classes": tf.TensorSpec(shape=(None,), dtype=tf.int32),
        },
    ),
)  # validation data
retinanet.evaluate(val_dataset)

In [None]:
def predict(image_path):
    image = tf.image.decode_jpeg(tf.io.read_file(image_path))
    image = tf.image.resize(image, (640, 640))
    image = tf.expand_dims(image, axis=0)

    # Predict bounding boxes and classes
    predictions = retinanet.predict(image)
    return predictions