In [1]:
import os
import torch
import torch.utils.data
import torchvision
from PIL import Image
from pycocotools.coco import COCO

In [2]:
class myOwnDataset(torch.utils.data.Dataset):
    def __init__(self, root, annotation, transforms=None):
        self.root = root
        self.transforms = transforms
        self.coco = COCO(annotation)
        self.ids = list(sorted(self.coco.imgs.keys()))

    def __getitem__(self, index):
        # Own coco file
        coco = self.coco
        # Image ID
        img_id = self.ids[index]
        # List: get annotation id from coco
        ann_ids = coco.getAnnIds(imgIds=img_id)
        # Dictionary: target coco_annotation file for an image
        coco_annotation = coco.loadAnns(ann_ids)
        # path for input image
        path = coco.loadImgs(img_id)[0]['file_name']
        # open the input image
        img = Image.open(os.path.join(self.root, path))

        # number of objects in the image
        num_objs = len(coco_annotation)

        # Bounding boxes for objects
        # In coco format, bbox = [xmin, ymin, width, height]
        # In pytorch, the input should be [xmin, ymin, xmax, ymax]
        boxes = []
        for i in range(num_objs):
            xmin = coco_annotation[i]['bbox'][0]
            ymin = coco_annotation[i]['bbox'][1]
            xmax = xmin + coco_annotation[i]['bbox'][2]
            ymax = ymin + coco_annotation[i]['bbox'][3]
            boxes.append([xmin, ymin, xmax, ymax])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # Labels (In my case, I only one class: target class or background)
        labels = torch.ones((num_objs,), dtype=torch.int64)
        # Tensorise img_id
        img_id = torch.tensor([img_id])
        # Size of bbox (Rectangular)
        areas = []
        for i in range(num_objs):
            areas.append(coco_annotation[i]['area'])
        areas = torch.as_tensor(areas, dtype=torch.float32)
        # Iscrowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        # Annotation is in dictionary format
        my_annotation = {}
        my_annotation["boxes"] = boxes
        my_annotation["labels"] = labels
        my_annotation["image_id"] = img_id
        my_annotation["area"] = areas
        my_annotation["iscrowd"] = iscrowd

        if self.transforms is not None:
            img = self.transforms(img)

        return img, my_annotation

    def __len__(self):
        return len(self.ids)

In [20]:
# path to your own data and coco file
train_data_dir = './frames/'
train_coco = './merged_coco_annotation/merged_coco.json'

# create own Dataset
my_dataset = myOwnDataset(root=train_data_dir,
                          annotation=train_coco,
                          )

# collate_fn needs for batch
def collate_fn(batch):
    return tuple(zip(*batch))

# Batch size
train_batch_size = 4

# own DataLoader
# solve the issue of 
data_loader = torch.utils.data.DataLoader(my_dataset,
                                          batch_size=train_batch_size,
                                          shuffle=True,
                                          num_workers=0,
                                          collate_fn=collate_fn)

loading annotations into memory...
Done (t=0.50s)
creating index...
index created!


In [21]:
my_dataset.__getitem__(0)

(<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3840x2160 at 0x7F86916F72E0>,
 {'boxes': tensor([[1626.5850,  148.1710, 1695.7321,  250.2440],
          [2011.8290,  701.3410, 2074.3899,  823.1710],
          [1687.6130,  137.0280, 1691.2190,  147.8460]]),
  'labels': tensor([1, 1, 1]),
  'image_id': tensor([0]),
  'area': tensor([7058.0415, 7621.8066,   39.0097]),
  'iscrowd': tensor([0, 0, 0])})

In [22]:
# select device (whether GPU or CPU)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

# # DataLoader is iterable over Dataset
# for imgs, annotations in data_loader:
#     imgs = list(img.to(device) for img in imgs)
#     annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
#     print(annotations)

train_features, train_labels = next(iter(data_loader))
print(train_features)
print(train_labels)


cpu
(<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3840x2160 at 0x7F86916DC790>, <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3840x2160 at 0x7F8660BE4430>, <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3840x2160 at 0x7F8660BE47C0>, <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3840x2160 at 0x7F8660BE4130>)
({'boxes': tensor([[2122.2710,  801.7470, 2178.8650,  914.9340],
        [1235.6331, 1329.9561, 1301.6591, 1433.7120]]), 'labels': tensor([1, 1]), 'image_id': tensor([7775]), 'area': tensor([6405.7051, 6850.5938]), 'iscrowd': tensor([0, 0])}, {'boxes': tensor([[2188.8000,  182.4000, 2371.2000,  441.6000],
        [2323.2000,  604.8000, 2496.0000,  873.6000],
        [3427.2000, 1555.2000, 3600.0000, 1680.0000]]), 'labels': tensor([1, 1, 1]), 'image_id': tensor([7936]), 'area': tensor([47278.0781, 46448.6406, 21565.4395]), 'iscrowd': tensor([0, 0, 0])}, {'boxes': tensor([[ 518.1080, 1090.9460,  795.4050, 1298.9189],
        [1685.6760, 1072.7030, 1