# voc2012 dataset download

In [5]:
import os
import urllib.request
import zipfile
import tarfile

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [20]:
data_dir = "/content/drive/MyDrive/Image/data/"
if not os.path.exists(data_dir):
    os.mkdir(data_dir)

In [21]:
weights_dir = "/content/drive/MyDrive/Image/weight/"
if not os.path.exists(weights_dir):
    os.mkdir(weights_dir)

In [22]:
url = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar"
target_path = os.path.join(data_dir, "VOCtrainval_11-May-2012.tar") 

if not os.path.exists(target_path):
    urllib.request.urlretrieve(url, target_path)
    
    tar = tarfile.TarFile(target_path) 
    tar.extractall(data_dir)  
    tar.close() 
    

In [23]:
# MIT License
# Copyright (c) 2017 Max deGroot, Ellis Brown
# https://github.com/amdegroot/ssd.pytorch
    
url = "https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth"
target_path = os.path.join(weights_dir, "vgg16_reducedfc.pth") 

if not os.path.exists(target_path):
    urllib.request.urlretrieve(url, target_path)

In [24]:
# MIT License
# Copyright (c) 2017 Max deGroot, Ellis Brown
# https://github.com/amdegroot/ssd.pytorch

url = "https://s3.amazonaws.com/amdegroot-models/ssd300_mAP_77.43_v2.pth"
target_path = os.path.join(weights_dir, "ssd300_mAP_77.43_v2.pth") 

if not os.path.exists(target_path):
    urllib.request.urlretrieve(url, target_path)

In [1]:
import os.path as osp
import random
import xml.etree.ElementTree as ET

import cv2
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.utils.data as data

%matplotlib inline

In [2]:
torch.manual_seed(1234)
np.random.seed(1234)
random.seed(1234)

# data path list 작성

- SSD dataset : Image classification data + annotation data 
> data annotation : dataset에 메터 데이터를 추가하는 작업 
> : '태그'(주석) 형식으로 이미지, 텍스트, 비디오 등 거의 모든 데이터에 추가가 가능하다. -> 한국에서는 라벨링이라는 표현으로 많이쓴인다.
> 즉, 인공지능이 데이터의 내용을 이해할수 있도록 주석을 달아주는 작업이라고 보면된다.

- annotation data를 사용하는 이유는 annotation technique중 하나인 BBox(Bounding Box)를 사용하기 때문이다.(대표적으로 , BBox, polygon,cuboid, semantic segmentation ..등 이 있다.)

---

- SSD에서는 Image data와 함깨 annotation data도 같이 처리해야한다.
> : image 정보가 바뀌면 BBox의 정보도 함께 변경해야한다. 



In [3]:
def make_datapath_list(rootpath):

    imgpath_template = osp.join(rootpath, 'JPEGImages', '%s.jpg')
    annopath_template = osp.join(rootpath, 'Annotations', '%s.xml')

    train_id_names = osp.join(rootpath + 'ImageSets/Main/train.txt')
    val_id_names = osp.join(rootpath + 'ImageSets/Main/val.txt')

    # 훈련 리스트
    train_img_list = list()
    train_anno_list = list()

    for line in open(train_id_names):
        file_id = line.strip()  # 공백, 줄바꿈 제거
        img_path = (imgpath_template % file_id)  # 화상 경로
        anno_path = (annopath_template % file_id)  # 어노테이션 경로
        train_img_list.append(img_path)  
        train_anno_list.append(anno_path)  

    # 검증 리스트
    val_img_list = list()
    val_anno_list = list()

    for line in open(val_id_names):
        file_id = line.strip()  # 공백, 줄바꿈 제거
        img_path = (imgpath_template % file_id)  # 화상 경로
        anno_path = (annopath_template % file_id) # 어노테이션 경로 
        val_img_list.append(img_path)  
        val_anno_list.append(anno_path)  

    return train_img_list, train_anno_list, val_img_list, val_anno_list

In [25]:
rootpath = "/content/drive/MyDrive/Image/data/VOCdevkit/VOC2012/"
train_img_list, train_anno_list, val_img_list, val_anno_list = make_datapath_list(
    rootpath)

print(train_img_list[0])

/content/drive/MyDrive/Image/data/VOCdevkit/VOC2012/JPEGImages/2008_000008.jpg


## XML to list (annotaion data)

In [26]:
class Anno_xml2list(object):
    
    def __init__(self, classes):
        self.classes = classes

    def __call__(self, xml_path, width, height):
        ret = []
        xml = ET.parse(xml_path).getroot()

        for obj in xml.iter('object'):
            difficult = int(obj.find('difficult').text)
            if difficult == 1:
                continue

            bndbox = []
            name = obj.find('name').text.lower().strip()  
            bbox = obj.find('bndbox')  

            pts = ['xmin', 'ymin', 'xmax', 'ymax']

            for pt in (pts):
                cur_pixel = int(bbox.find(pt).text) - 1

                if pt == 'xmin' or pt == 'xmax':  
                    cur_pixel /= width
                else:  
                    cur_pixel /= height

                bndbox.append(cur_pixel)

            label_idx = self.classes.index(name)
            bndbox.append(label_idx)
            ret += [bndbox]

        return np.array(ret)  # [[xmin, ymin, xmax, ymax, label_ind], ... ]

In [27]:
voc_classes = ['aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat', 'chair',
               'cow', 'diningtable', 'dog', 'horse',
               'motorbike', 'person', 'pottedplant',
               'sheep', 'sofa', 'train', 'tvmonitor']

transform_anno = Anno_xml2list(voc_classes)

ind = 1
image_file_path = val_img_list[ind]
img = cv2.imread(image_file_path)  
height, width, channels = img.shape  

transform_anno(val_anno_list[ind], width, height)

array([[ 0.09      ,  0.03003003,  0.998     ,  0.996997  , 18.        ],
       [ 0.122     ,  0.56756757,  0.164     ,  0.72672673, 14.        ]])

# References
- 만들면서 배우는 파이토치 딥러닝, 오가와 유타로 저, 박광수 옮김, 한빛미디어 (2021)