The following notebook runs through downloading an object detection dataset to volumes and training a basic Tensorflow convolutional network to do object detection.

It is designed to use DBR 15.4ML

Let's use Huggingface Datasets to get the CPPE-5 Dataset

In [0]:
from datasets import load_dataset
cppe5 = load_dataset("cppe-5")

This takes ~15 minutes to read, parse, and write out an annoted dataset with raw images and json annotations

In [0]:
import os
import json
from PIL import Image
import io

# Define paths for saving
base_path = "/Volumes/shm/default/cppe5"
image_path = os.path.join(base_path, "images")
annotation_path = os.path.join(base_path, "annotations")

# Create directories if they don't exist
os.makedirs(image_path, exist_ok=True)
os.makedirs(annotation_path, exist_ok=True)

def save_example(example, idx):
    # Save image
    image = example['image']
    image_filename = f"image_{int(example['image_id']):04d}.png"
    image.save(os.path.join(image_path, image_filename), optimize=True)
    
    # Prepare annotation
    annotation = {
        "image_id": example['image_id'],
        "file_name": image_filename,
        "width": example['width'],
        "height": example['height'],
        "objects": example['objects']
    }
    
    # Save annotation
    with open(os.path.join(annotation_path, f"annotation_{idx}.json"), 'w') as f:
        json.dump(annotation, f)

# Process and save each example
saves = 0
for idx, example in enumerate(cppe5['train']):
    try:
        save_example(example, idx)
        if (saves + 1) % 25 == 0:
            print(f"Saved {idx + 1} examples")
        saves += 1
    except:
        continue