In [64]:
import torch
from torch.utils.data import Dataset, DataLoader
import os
from PIL import Image
import xml.etree.ElementTree as ET
import json

In [7]:
coin_labels = ('10c', '20c', '50c', '$1', '$2', '$5', '$10')
label_map = {k: v + 1 for v, k in enumerate(coin_labels)}
label_map['background'] = 0

In [47]:
def parse_annotation(annotation_path):
    tree = ET.parse(annotation_path)
    root = tree.getroot()
    boxes = list()
    labels = list()
    # difficulties = list()
    for object in root.iter('object'):

        # difficult = int(object.find('difficult').text == '1')

        label = object.find('name').text.lower().strip()
        if label not in label_map:
            continue

        bbox = object.find('bndbox')
        # TODO: why minus 1?
        xmin = int(bbox.find('xmin').text) - 1
        ymin = int(bbox.find('ymin').text) - 1
        xmax = int(bbox.find('xmax').text) - 1
        ymax = int(bbox.find('ymax').text) - 1

        boxes.append([xmin, ymin, xmax, ymax])
        labels.append(label_map[label])
        # difficulties.append(difficult)

    return {'boxes': boxes, 'labels': labels}#, 'difficulties': difficulties}ies}

In [67]:
from glob import glob
from pathlib import Path

ch_files = glob(r'dataset\ch dataset\*.xml')
jy_files = glob(r'dataset\joey dataset\*.xml')
#hw_files = glob(r'dataset\hw dataset\*.xml')

# save as 2 separate json objects, one for images, one for bounding box values and objects
def xml_to_json(files):
    images_list = [] 
    objects_list = [] # file: boxes: [], labels: []
    image_f = open("TRAIN_images.json", 'w')
    objects_f = open("TRAIN_objects.json", 'w')
    for file in files:
        file_name = Path(file).stem
        file_path = os.path.splitext(file)[0]
        xml_dict = parse_annotation(file)
        objects_list.append(xml_dict)
        images_list.append(file_path + ".jpg")

    json.dump(images_list, image_f)
    json.dump(objects_list, objects_f)

xml_to_json(ch_files)
# xml_to_json(jy_files)

In [68]:
class PascalVOCDataset(Dataset):
    """
    A PyTorch Dataset class to be used in a PyTorch DataLoader to create batches.
    """

    def __init__(self, data_folder, split):
        """
        :param data_folder: folder where data files are stored
        :param split: split, one of 'TRAIN' or 'TEST'
        """
        self.split = split.upper()

        assert self.split in {'TRAIN', 'TEST'}

        self.data_folder = data_folder

        # Read data files
        with open(os.path.join(data_folder, self.split + '_images.json'), 'r') as j:
            self.images = json.load(j)
        with open(os.path.join(data_folder, self.split + '_objects.json'), 'r') as j:
            self.objects = json.load(j)

        assert len(self.images) == len(self.objects)

    def __getitem__(self, i):
        # Read image
        image = Image.open(self.images[i], mode='r')
        image = image.convert('RGB')

        # Read objects in this image (bounding boxes, labels)
        objects = self.objects[i]
        boxes = torch.FloatTensor(objects['boxes'])  # (n_objects, 4)
        labels = torch.LongTensor(objects['labels'])  # (n_objects)

        # Apply transformations
        # image, boxes, labels, difficulties = transform(image, boxes, labels, split=self.split)

        return image, boxes, labels

    def __len__(self):
        return len(self.images)

    def collate_fn(self, batch):
        """
        Since each image may have a different number of objects, we need a collate function (to be passed to the DataLoader).
        This describes how to combine these tensors of different sizes. We use lists.
        Note: this need not be defined in this Class, can be standalone.
        :param batch: an iterable of N sets from __getitem__()
        :return: a tensor of images, lists of varying-size tensors of bounding boxes, labels, and difficulties
        """

        images = list()
        boxes = list()
        labels = list()

        for b in batch:
            images.append(b[0])
            boxes.append(b[1])
            labels.append(b[2])

        images = torch.stack(images, dim=0)

        return images, boxes, labels  # tensor (N, 3, 300, 300), 3 lists of N tensors each

In [69]:
dataset = PascalVOCDataset(".", "TRAIN")
train_dataloader = DataLoader(dataset, batch_size=8, shuffle=True, collate_fn = dataset.collate_fn)
for data in train_dataloader:
    print(data)

TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'PIL.Image.Image'>