# PASCAL VOC DataLoader & Bounding Boxes

## 1. Prepare PASCAL VOC2007 dataset.

In [None]:
## link: https://drive.google.com/file/d/1FSPPvm6-QZ43pCYzPA3-pyN7RZ0rq5DT/view?usp=sharing
## id: 1FSPPvm6-QZ43pCYzPA3-pyN7RZ0rq5DT
## filename: VOC2007.zip

!wget --load-cookies ~/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies ~/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1FSPPvm6-QZ43pCYzPA3-pyN7RZ0rq5DT' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1FSPPvm6-QZ43pCYzPA3-pyN7RZ0rq5DT" -O VOC2007.zip && rm -rf ~/cookies.txt

In [None]:
import zipfile
with zipfile.ZipFile('VOC2007.zip', 'r')as f:
    f.extractall('./')

!rm -rf VOC2007.zip
!rm -rf __MACOSX

## 2. Import packages.

In [None]:
import os

import numpy as np
import scipy.io as sio
import pickle
import torch
import xml.etree.ElementTree as ET
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from torchvision import transforms

In [None]:
VOC2007_CLASSES = (
    '__background__',
    'aeroplane', 'bicycle', 'bird', 'boat',
    'bottle', 'bus', 'car', 'cat', 'chair',
    'cow', 'diningtable', 'dog', 'horse',
    'motorbike', 'person', 'pottedplant',
    'sheep', 'sofa', 'train', 'tvmonitor'
)

# Define bbox color
VOC2007_COLORS = [
    [0.,        0.,        0.       ],
 [0.5019608, 0.,        0.       ],
 [0.,        0.5019608, 0.       ],
 [0.5019608, 0.5019608, 0.       ],
 [0.,        0.,        0.5019608],
 [0.5019608, 0.,        0.5019608],
 [0.,        0.5019608, 0.5019608],
 [0.5019608, 0.5019608, 0.5019608],
 [0.2509804, 0.,        0.       ],
 [0.7529412, 0.,        0.       ],
 [0.2509804, 0.5019608, 0.       ],
 [0.7529412, 0.5019608, 0.       ],
 [0.2509804, 0.,        0.5019608],
 [0.7529412, 0.,        0.5019608],
 [0.2509804, 0.5019608, 0.5019608],
 [0.7529412, 0.5019608, 0.5019608],
 [0.,        0.2509804, 0.       ],
 [0.5019608, 0.2509804, 0.       ],
 [0.,        0.7529412, 0.       ],
 [0.5019608, 0.7529412, 0.       ],
 [0.,        0.2509804, 0.5019608]]

## 3. Load an annotation file (working with a XML file).

### xml file example
- open any xml file by web browsers

- output of "__getitems__" should be

>image : a PIL Image of size (H, W)  
>target : a dict containing the following key  
>>boxes (FloatTensor[N, 4]) :  the coordinates of the N bounding boxes in [x0, y0, x1, y1] format, ranging from 0 to W and 0 to H  
>>labels (Int64Tensor[N]) : the label for each bounding box  
>>image_id (Int64Tensor[1]): an image identifier. It should be unique between all the images in the dataset, and is used during evaluation  
>>area (Tensor[N]): The area of the bounding box. This is used during evaluation with the COCO metric, to separate the metric scores between small, medium and large boxes.  
>>iscrowd (UInt8Tensor[N]): instances with iscrowd=True will be ignored during evaluation(all set zero) 

In [None]:
### Parse xml file
sample_idx = 1
filename = os.path.join(f'./VOC2007/Annotations/{sample_idx:06d}.xml')
tree = ET.parse(filename)

In [None]:
### explore xml file
root = tree.getroot()
print("root.tag:", root.tag)
for node in root:
    print(">>>>", node.tag)
print('\n')

print("node.tag:", node.tag)
for node2 in node:  ### iterate the final object
    print(f">>>> {node2.tag}: {node2.text}")
print("\n")

print("node2.tag:", node2.tag)
for node3 in node2: ### iterate the final bndbox (bounding box)
    print(f">>>> {node3.tag}: {node3.text}")

### 3-1. Find all objects

In [None]:
objs = tree.findall('object')
num_objs = len(objs)
print("the number of objects:", num_objs)
print("objs:", objs) # List[Element]

for obj_idx, obj in enumerate(objs):
    for node in obj:
        print(f">>>> {obj_idx}th {node.tag}: {node.text}")
        for node2 in node:
            print(f">>>> {obj_idx}th bounding box's {node2.tag}: {node2.text}")
    print("")

### 3-2. Obtain bounding boxes as np.ndarray.

In [None]:
boxes = np.zeros((num_objs, 4), dtype=np.int32)
classes = []
for obj_idx, obj in enumerate(objs):
    ###Get bounding boxes
    bbox = obj.find('bndbox')
    x1 = float(bbox.find('xmin').text) - 1 # correct coordinates
    y1 = float(bbox.find('ymin').text) - 1
    x2 = float(bbox.find('xmax').text) - 1
    y2 = float(bbox.find('ymax').text) - 1
    boxes[obj_idx, :] = [x1, y1, x2, y2]
    
    ###Get Categories
    object_class = obj.find('name').text   # class name *as string
    classes.append(object_class)
    
print(boxes)
print(classes)

In [None]:
img = Image.open(f'./VOC2007/JPEGImages/{sample_idx:06d}.jpg')
fig, ax = plt.subplots(1)
ax.imshow(img)

In [None]:
def draw_bb_voc(img, boxes, classes, red_only=False):
    fig,ax = plt.subplots(1)
    for box, class_ in zip(boxes, classes):
        rect = patches.Rectangle(
            (box[0], box[1]), # the upper left point
            box[2]-box[0], # delta_x: width
            box[3]-box[1], # delta_y: height
            linewidth=3, # thickness
            edgecolor='r' if red_only else VOC2007_COLORS[VOC2007_CLASSES.index(class_)],
            facecolor='none'
        )
        ax.add_patch(rect)
    ax.imshow(img)

draw_bb_voc(img, boxes, classes, True)

## 4. Change into COCO format 

### COCO Json format example

- json is a kind of dict

>root
>>type : "instances", "captions", "person_keypoints"   
>>images
>>>file_name : image_file_name (include type)   
>>>height : image height   
>>>width : image width   
>>>id : image_id (without file type)   

>>annotations : a dict containing the following key
>>>area : The area of the bounding box. This is used during evaluation with the COCO metric, to separate the metric scores between small, medium and large boxes   
>>>iscrowd : (UInt8Tensor[N]): instances with iscrowd=True will be ignored during evaluation(all set zero)   
>>>bbox : (FloatTensor[N, 4]) :  the coordinates of the N bounding boxes in [x0, y0, x1, y1] format, ranging from 0 to W and 0 to H
>>>category_id : coco category id   
>>>ignore : 0 / 1
>>>segmentation : If exists segmentation id
>>>image_id : an image identifier. It should be unique between all the images in the dataset, and is used during evaluation   
>>>id : image_id (same as file_name)   

>>categories :
>>>supercategory : ""   
>>>id :   
>>>name :   

In [None]:
# Make class to class_id labels
import os
import pickle
os.makedirs('./VOC2COCO/annotations', exist_ok=True)

## PASCAL_VOC_LABLES
class_list = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable','dog', 'horse', 'motorbike', 'person', 'pottedplant','sheep', 'sofa', 'train', 'tvmonitor'] 

class_dict_path = 'VOC2COCO/labels.txt'
with open(class_dict_path, 'w')as f:
    f.write('\n'.join(class_list))

In [None]:
# from https://github.com/yukkyo/voc2coco/blob/master/voc2coco.py

import os
import argparse
import json
import xml.etree.ElementTree as ET
from typing import Dict, List
from tqdm import tqdm
import re


def get_label2id(labels_path: str) -> Dict[str, int]:
    """id is 1 start"""
    with open(labels_path, 'r') as f:
        labels_str = f.read().split()
    labels_ids = list(range(1, len(labels_str)+1))

    return dict(zip(labels_str, labels_ids))

def get_annpaths(ann_dir_path: str = None,
                 ann_ids_path: str = None,
                 ext: str = '') -> List[str]:
    # If use annotaion ids list
    ext_with_dot = '.' + ext if ext != '' else ''
    with open(ann_ids_path, 'r') as f:
        ann_ids = f.read().split()
    ann_paths = [os.path.join(ann_dir_path, aid+ext_with_dot) for aid in ann_ids]
    return ann_paths


def get_image_info(annotation_root):
    
    filename = annotation_root.findtext('filename')
    
    img_name = os.path.basename(filename)
    img_id = os.path.splitext(img_name)[0]

    size = annotation_root.find('size')
    width = int(size.findtext('width'))
    height = int(size.findtext('height'))

    image_info = {
        'file_name': filename,
        'height': height,
        'width': width,
        'id': int(img_id)
    }
    return image_info


def get_coco_annotation_from_obj(obj, label2id):
    label = obj.findtext('name')
    assert label in label2id, f"Error: {label} is not in label2id !"
    category_id = label2id[label]
    bndbox = obj.find('bndbox')
    xmin = float(bndbox.findtext('xmin')) - 1
    ymin = float(bndbox.findtext('ymin')) - 1
    xmax = float(bndbox.findtext('xmax'))
    ymax = float(bndbox.findtext('ymax'))
    assert xmax > xmin and ymax > ymin, f"Box size error !: (xmin, ymin, xmax, ymax): {xmin, ymin, xmax, ymax}"
    o_width = xmax - xmin
    o_height = ymax - ymin
    ann = {
        'area': o_width * o_height,
        'iscrowd': 0,
        'bbox': [xmin, ymin, o_width, o_height],
        'category_id': category_id,
        'segmentation': []  # This script is not for segmentation
    }
    return ann


def convert_xmls_to_cocojson(annotation_paths: List[str],
                             label2id: Dict[str, int],
                             output_jsonpath: str):
    output_json_dict = {
        "images": [],
        "type": "instances",
        "annotations": [],
        "categories": []
    }
    bnd_id = 1  # START_BOUNDING_BOX_ID
    print('Start converting !')
    for a_path in tqdm(annotation_paths):
        # Read annotation xml
        ann_tree = ET.parse(a_path)
        ann_root = ann_tree.getroot()

        img_info = get_image_info(annotation_root=ann_root)
        img_id = img_info['id']
        output_json_dict['images'].append(img_info)

        for obj in ann_root.findall('object'):
            ann = get_coco_annotation_from_obj(obj=obj, label2id=label2id)
            ann.update({'image_id': img_id, 'id': bnd_id})
            output_json_dict['annotations'].append(ann)
            bnd_id = bnd_id + 1

    for label, label_id in label2id.items():
        category_info = {'supercategory': 'none', 'id': label_id, 'name': label}
        output_json_dict['categories'].append(category_info)

    with open(output_jsonpath, 'w') as f:
        output_json = json.dumps(output_json_dict)
        f.write(output_json)

In [None]:
def voc2coco(ann_dir, ann_ids, labels, output, ext='xml'):

    label2id = get_label2id(labels_path=labels)
    ann_paths = get_annpaths(
        ann_dir_path=ann_dir,
        ann_ids_path=ann_ids,
        ext=ext,
    )
    
    convert_xmls_to_cocojson(
        annotation_paths=ann_paths,
        label2id=label2id,
        output_jsonpath=output,
    )


In [None]:
voc2coco(ann_dir='VOC2007/Annotations',
     ann_ids='VOC2007/ImageSets/Layout/train.txt',
     labels='VOC2007/labels.txt',
     output='VOC2COCO/annotations/instances_train2017.json')

voc2coco(ann_dir='VOC2007/Annotations',
     ann_ids='VOC2007/ImageSets/Layout/test.txt',
     labels='VOC2007/labels.txt',
     output='VOC2COCO/annotations/instances_test2017.json')

In [None]:
import os
import shutil
import json
os.makedirs('./VOC2COCO/train2017', exist_ok=True)
os.makedirs('./VOC2COCO/val2017', exist_ok=True)

f = open('VOC2COCO/annotations/instances_train2017.json', 'r')
train = json.load(f)
for data in train['images']:
    shutil.copy2(f"./VOC2007/JPEGImages/{data['file_name']}", f"./VOC2COCO/train2017/{data['file_name']}")

f = open('VOC2COCO/annotations/instances_test2017.json', 'r')
val = json.load(f)
for data in val['images']:
    shutil.copy2(f"./VOC2007/JPEGImages/{data['file_name']}", f"./VOC2COCO/val2017/{data['file_name']}")

## 4. Define CUSTOM DATALOADER

In [None]:
import os
import json
import torch
from PIL import Image

class_list = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable','dog', 'horse', 'motorbike', 'person', 'pottedplant','sheep', 'sofa', 'train', 'tvmonitor'] 
class_dict = {}
for i in range(len(class_list)):
    class_dict[i+1] = class_list[i]

class PASCAL_DATASET(torch.utils.data.Dataset):
    def __init__(self, data_root, image_dir, split='train'):        
        self.data_root = data_root
        self.image_dir = os.path.join(data_root, image_dir)
        self.img_list = [file for file in os.listdir(self.image_dir) if file.endswith(r'.jpg')]
        annotation_dir = os.path.join(data_root, 'annotations', f'instances_{split}2017.json')
        self.class_dict = class_dict
        self.boxes, self.gt_classes_str = self._load_annotation(annotation_dir)
                                  
    def _load_annotation(self, annotation_dir):
        boxes = {}
        gt_classes_str = {}
        
        with open(annotation_dir)as f: 
            data = json.load(f)
        
        for box_dict in data['annotations']:
            bbox = box_dict['bbox']
            category = self.class_dict[box_dict['category_id']]
            if box_dict['image_id'] not in boxes:
               boxes[box_dict['image_id']] = [bbox]
               gt_classes_str[box_dict['image_id']] = [category]
            else:
               boxes[box_dict['image_id']] += [bbox]
               gt_classes_str[box_dict['image_id']] += [category]
    

        return boxes, gt_classes_str
                                      
    def __len__(self,):
        return len(self.img_list)

    def __getitem__(self, index):
        img_path = self.img_list[index]
        img = Image.open(os.path.join(self.image_dir, img_path))
        img_idx = int(os.path.splitext(img_path)[0])
        boxes, gt_classes_str = self.boxes[img_idx], self.gt_classes_str[img_idx]
        boxes = np.array(boxes)
        return img, boxes, gt_classes_str

In [None]:
pascal_data = PASCAL_DATASET('./VOC2COCO', 'train2017')

In [None]:
pascal_it = iter(pascal_data)
first_data = next(pascal_it)

In [None]:
first_data

In [None]:
def draw_bb_coco(img, boxes, classes, red_only=False):
    fig,ax = plt.subplots(1)
    for box, class_ in zip(boxes, classes):
        rect = patches.Rectangle(
            (box[0], box[1]), # the upper left point
            box[2], # width
            box[3], # height
            linewidth=3, # thickness
            edgecolor='r' if red_only else VOC2007_COLORS[VOC2007_CLASSES.index(class_)],
            facecolor='none'
        )
        ax.add_patch(rect)
    ax.imshow(img)


In [None]:
draw_bb_coco(first_data[0], first_data[1], first_data[2], True)

In [None]:
next_data = next(pascal_it)
print(next_data)
draw_bb_coco(next_data[0], next_data[1], next_data[2])