# How to train an Object Detector with your own COCO dataset in PyTorch (Common Objects in Context format)

## Understanding the Dataset & DataLoader in PyTorch

- [Link to Medium post by Takashi Nakamura, PhD](https://medium.com/fullstackai/how-to-train-an-object-detector-with-your-own-coco-dataset-in-pytorch-319e7090da5)
- [Create COCO Annotations From Scratch](https://www.immersivelimit.com/tutorials/create-coco-annotations-from-scratch)
- []()

Dataset was annotated using [AnyLabeling](https://anylabeling.nrl.ai)

[format converter](https://github.com/enekuie/COCO-json-annotations-to-YOLO-txt-format-converter)

[Object Detection: COCO and YOLO formats, and conversion between them](https://freedium.cfd/https://towardsdatascience.com/object-detection-coco-and-yolo-formats-and-conversion-between-them-0e0638f4ffc1)


In [2]:
import os
import torch
import torchvision
import  torch.utils.data
from PIL import Image
from pycocotools.coco import COCO

In [3]:
print(f"PyTorch Version: {torch.__version__}")
# Check PyTorch has access to MPS (Metal Performance Shader, Apple's GPU architecture)
print(f"Is MPS (Metal Performance Shader) built? {torch.backends.mps.is_built()}")
print(f"Is MPS available? {torch.backends.mps.is_available()}")

# Set the device
# device = "mps" if torch.backends.mps.is_available() else "cpu"
device = (
    "mps"
    if torch.mps.is_available()
    else "cuda"
    if torch.cuda.is_available()
    else "cpu"
)
print(f"Using device: {device}")

PyTorch Version: 2.5.1
Is MPS (Metal Performance Shader) built? True
Is MPS available? True
Using device: mps


DogBreed Dataset Class for returning images and labels

In [2]:
class dogBreedDataset(torch.util.data.Dataset):
    # Initialize function of class
    def __init__(self, root, filenames, labels, transforms=None, annotation):
        # the data directory
        self.root = root
        # list of filenames
        self.filenames = filenames
        # list of labels
        self.labels = labels
        self.coco = COCO(annotation)
        self.transforms = transforms
        self.ids = list(sorted(self.coco.imgs.keys()))

    # obtain sample from index
    def __getitem__(self, index):

        coco = self.coco
        
        # Image ID
        img_id = self.ids[index]
        
        # List of annotation IDs from coco
        ann_ids = coco.getAnnIds(imgIds=img_id)
        
        # Dictonary target coco_annotation file for an image
        coco_annotation = coco.loadAnns(ann_ids)
        
        # path to the image file
        path = coco.loadImgs(img_id)[0]['file_name']
        
        # open input image
        img = Image.open(os.path.join(self.root, path)) # .convert("RGB")

        # number of objects in the image
        num_objs = len(coco_annotation)
        
        # bounding boxes for objects
        # In coco format, bbox = [xmin, ymin, width, height]
        # in pytorch, the input should be [xmin, ymin, xmax, ymax]
        
        boxes = []
        for i in range(num_objs):
            xmin = coco_annotation[i]['bbox'][0]
            ymin = coco_annotation[i]['bbox'][1]
            xmax = xmin + coco_annotation[i]['bbox'][2]
            ymax = ymin + coco_annotation[i]['bbox'][3]
            boxes.append([xmin, ymin, xmax, ymax])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        
        # classes (dog breeds)
        classes = []
        for i in range(num_objs):
            classes.append(coco_annotation[i]['category_id'])

    # return the length of the dataset
    def __len__(self):
        return len(self.filenames)

In [None]:
# data directory
root = "../data/renamed_dub_removed"

# assume we have 3 jpg images
filenames = [
    "american_pit_bull_terrier_0001.jpg",
    "american_pit_bull_terrier_0002.jpg",
    "american_pit_bull_terrier_0003.jpg",
]

# the class of image might be ['black cat', 'tabby cat', 'tabby cat']
labels = [0, 1, 1]

# create own Dataset
my_dataset = dogBreedDataset(root=root, filenames=filenames, labels=labels)
