In [None]:
# Paper You Only Look Once: Unified, Real-Time Object Detection
# Link https://arxiv.org/abs/1506.02640

loading annotations into memory...
Done (t=15.19s)
creating index...
index created!


## Prerequisites
- Basic understanding of deep-learning


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from pycocotools.coco import COCO
import os
from os.path import join, isdir, isfile
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import random
from pycocotools.coco import COCO

# Define paths to your images and annotations
DATA_FOLDER = "../../Datasets/COCO"
img_path = join(DATA_FOLDER, "train2017")
ann_path = join(DATA_FOLDER, "annotations_trainval2017", "annotations", "instances_train2017.json")

class CocoDetectionDataset(Dataset):
    def __init__(self, root, ann_file, transforms=None):
        self.root = root
        self.coco = COCO(ann_file)
        self.ids = list(sorted(self.coco.imgs.keys())) # Get all image IDs
        self.transforms = transforms

        # Optional: Load category information for mapping IDs to names
        cat_ids = self.coco.getCatIds()
        categories = self.coco.loadCats(cat_ids)
        self.id_to_name = {cat['id']: cat['name'] for cat in categories}
        self.name_to_id = {cat['name']: cat['id'] for cat in categories}
        self.contiguous_category_id_map = {
            old_id: new_id for new_id, old_id in enumerate(sorted(cat_ids))
        }

    def __getitem__(self, index):
        # Get image ID
        img_id = self.ids[index]

        # Load image information (file name, width, height)
        img_info = self.coco.loadImgs(img_id)[0]
        path = img_info['file_name']
        image = Image.open(os.path.join(self.root, path)).convert('RGB')
        
        # Load annotations for the current image
        ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=None)
        # iscrowd=None includes both crowd and non-crowd annotations.
        # For object detection, you typically want to exclude 'iscrowd=1' annotations.
        # Filter out crowd annotations if necessary for your task
        annotations = [ann for ann in self.coco.loadAnns(ann_ids) if ann['iscrowd'] == 0]

        # Prepare target dictionary as expected by many detection models
        # This typically includes: boxes, labels, image_id, area, iscrowd
        boxes = []
        labels = []
        areas = []

        for ann in annotations:
            # COCO bbox format: [x, y, width, height]
            x, y, w, h = ann['bbox']
            # Convert to [xmin, ymin, xmax, ymax] for PyTorch detection models
            boxes.append([x, y, x + w, y + h])
            
            # Map COCO category ID to a contiguous ID (optional, but good practice)
            # Some models expect labels to start from 0 and be contiguous
            labels.append(self.contiguous_category_id_map[ann['category_id']])
            areas.append(ann['area'])

        # Convert to PyTorch tensors
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        areas = torch.as_tensor(areas, dtype=torch.float32)
        
        # Unique image ID for evaluation (COCO metric requires this)
        image_id = torch.tensor([img_id])

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = areas
        # 'iscrowd' is usually handled by filtering annotations, but you can
        # include a tensor if your model expects it. For simplicity, we assume
        # all annotations here are non-crowd as filtered above.
        target["iscrowd"] = torch.zeros((len(boxes),), dtype=torch.int64) # Assuming all are not crowd after filtering

        if self.transforms:
            # Note: Custom transforms need to handle both image and target
            # For simplicity, if you use a simple PIL to Tensor transform here,
            # you might need to apply it manually to the image and then process
            # targets separately or use torchvision.transforms.v2 as in the previous example
            # which is designed for this.
            image, target = self.transforms(image, target)
        
        return image, target

    def __len__(self):
        return len(self.ids)


: 

In [None]:
for idx, (img, label) in enumerate(coco_dataset):
    print(img.size, label)
    if idx == 10:
        break

(640, 480) [{'segmentation': [[500.49, 473.53, 599.73, 419.6, 612.67, 375.37, 608.36, 354.88, 528.54, 269.66, 457.35, 201.71, 420.67, 187.69, 389.39, 192.0, 19.42, 360.27, 1.08, 389.39, 2.16, 427.15, 20.49, 473.53]], 'area': 120057.13925, 'iscrowd': 0, 'image_id': 9, 'bbox': [1.08, 187.69, 611.59, 285.84], 'category_id': 51, 'id': 1038967}, {'segmentation': [[357.03, 69.03, 311.73, 15.1, 550.11, 4.31, 631.01, 62.56, 629.93, 88.45, 595.42, 185.53, 513.44, 230.83, 488.63, 232.99, 437.93, 190.92, 429.3, 189.84, 434.7, 148.85, 410.97, 121.89, 359.19, 74.43, 358.11, 65.8]], 'area': 44434.751099999994, 'iscrowd': 0, 'image_id': 9, 'bbox': [311.73, 4.31, 319.28, 228.68], 'category_id': 51, 'id': 1039564}, {'segmentation': [[249.6, 348.99, 267.67, 311.72, 291.39, 294.78, 304.94, 294.78, 326.4, 283.48, 345.6, 273.32, 368.19, 269.93, 385.13, 268.8, 388.52, 257.51, 393.04, 250.73, 407.72, 240.56, 425.79, 230.4, 441.6, 229.27, 447.25, 237.18, 447.25, 256.38, 456.28, 254.12, 475.48, 263.15, 486.78,

In [1]:
import latexify
@latexify.expression
def solve(a, b, c):
  return (-b + math.sqrt(b**2 - 4*a*c)) / (2*a)

solve


<latexify.ipython_wrappers.LatexifiedFunction at 0x7ea4e432f550>