## Imports

In [1]:
import torchvision
import os

from torchvision.datasets import CocoDetection
from transformers import DetrImageProcessor
from torch.utils.data import DataLoader

  from .autonotebook import tqdm as notebook_tqdm


In [13]:
class SeatbeltCocoDataset(CocoDetection):
    
    def __init__(self, img_folder, processor):
        ann_file = os.path.join(img_folder, "_annotations.coco.json")
        super(SeatbeltCocoDataset, self).__init__(img_folder, ann_file)
        self.processor = processor

    def __getitem__(self, idx):
        # read in PIL image and target in COCO format
        # feel free to add data augmentation here before passing them to the next step
        img, target = super(SeatbeltCocoDataset, self).__getitem__(idx)
        print(target)
        
        # preprocess image and target (converting target to DETR format, resizing + normalization of both image and target)
        image_id = self.ids[idx]
        target = {'image_id': image_id, 'annotations': target}
        encoding = self.processor(images=img, annotations=target, return_tensors="pt")
        pixel_values = encoding["pixel_values"].squeeze() # remove batch dimension
        target = encoding["labels"][0] # remove batch dimension

        return pixel_values, target

In [14]:
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")

train_dataset = SeatbeltCocoDataset(img_folder='./data/train', processor=processor)
val_dataset = SeatbeltCocoDataset(img_folder='./data//valid', processor=processor)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [15]:
print("Number of training examples:", len(train_dataset))
print("Number of validation examples:", len(val_dataset))

Number of training examples: 564
Number of validation examples: 161


In [16]:
print(train_dataset[1])

[{'id': 1, 'image_id': 1, 'category_id': 2, 'bbox': [297, 245, 300, 321], 'area': 96300, 'segmentation': [], 'iscrowd': 0}]
(tensor([[[ 2.0777,  2.0263,  2.0948,  ..., -2.0494, -2.1008, -2.0665],
         [-0.8164,  1.2728,  2.0948,  ..., -2.0152, -2.0323, -2.1008],
         [-0.1999,  1.4098,  2.0948,  ..., -2.0494, -2.0494, -2.1008],
         ...,
         [-1.9980, -2.0665, -2.1008,  ...,  2.1633,  2.1119,  2.1633],
         [-2.1008, -2.1008, -2.1008,  ...,  2.1290,  2.0777,  2.2489],
         [-2.0323, -2.0837, -2.1008,  ...,  2.2318,  2.2147,  2.2318]],

        [[ 2.1835,  2.1310,  2.2010,  ..., -1.9307, -2.0007, -1.9482],
         [-0.7752,  1.3957,  2.2185,  ..., -1.9132, -1.9307, -2.0007],
         [-0.1275,  1.5182,  2.2360,  ..., -1.9657, -1.9657, -2.0007],
         ...,
         [-1.9132, -1.9832, -2.0182,  ...,  2.3410,  2.2885,  2.3410],
         [-2.0182, -2.0182, -2.0182,  ...,  2.3060,  2.2535,  2.4286],
         [-1.9482, -2.0007, -2.0182,  ...,  2.4111,  2.3936,  2.

In [10]:
pixel_values, target = train_dataset[0]

In [11]:
pixel_values.shape

torch.Size([3, 800, 800])

In [12]:
print(target)

{'size': tensor([800, 800]), 'image_id': tensor([0]), 'class_labels': tensor([1]), 'boxes': tensor([[0.3109, 0.5367, 0.3750, 0.3734]]), 'area': tensor([89625.]), 'iscrowd': tensor([0]), 'orig_size': tensor([640, 640])}


In [8]:
def collate_fn(batch):
  
  pixel_values = [item[0] for item in batch]
  encoding = processor.pad(pixel_values, return_tensors="pt")
  labels = [item[1] for item in batch]
  batch = {}
  batch['pixel_values'] = encoding['pixel_values']
  batch['pixel_mask'] = encoding['pixel_mask']
  batch['labels'] = labels
  
  return batch

train_dataloader = DataLoader(train_dataset, collate_fn=collate_fn, batch_size=4, shuffle=True)
val_dataloader = DataLoader(val_dataset, collate_fn=collate_fn, batch_size=2)
print(len(val_dataloader))
batch = next(iter(train_dataloader))

81


In [17]:
batch.keys()

dict_keys(['pixel_values', 'pixel_mask', 'labels'])

In [18]:
print(target)

{'size': tensor([800, 800]), 'image_id': tensor([0]), 'class_labels': tensor([1]), 'boxes': tensor([[0.3109, 0.5367, 0.3750, 0.3734]]), 'area': tensor([89625.]), 'iscrowd': tensor([0]), 'orig_size': tensor([640, 640])}
