## PASCAL VOC Dataset 
- Annotations/ .xml 파일들
    - <size> : 이미지의 width, height, channels 정보
    - <width>,<height>,<depth> : 이미지의 width, height, channels
    - <object>, <name> : 이미지속 object의 정보, 클래스이름을 의미
    - <bndbox> : 해당 object의 바운딩상자의 정보
        - <xmin>,<ymin> : (왼쪽, 오른쪽)상단 x,y축 좌표값,
        - <xmax>,<ymax> : (왼쪽, 오른쪽)하단 x,y축 좌표값

### VOC Annotations Dataset .xml 구조

In [None]:
<annotation>
    <folder>VOC2007</folder>
    <filename>000001.jpg</filename>
    <source>
        <database>The VOC2007 Database</database>
        <annotation>PASCAL VOC2007</annotation>
        <image>flickr</image>
        <flickrid>341012865</flickrid>
    </source>
    <owner>
        <flickrid>Fried Camels</flickrid>
        <name>Jinky the Fruit Bat</name>
    </owner>
    <size>
        <width>353</width>
        <height>500</height>
        <depth>3</depth>
    </size>
    <segmented>0</segmented>
    <object>
        <name>dog</name>
        <pose>Left</pose>
        <truncated>1</truncated>
        <difficult>0</difficult>
        <bndbox>
            <xmin>48</xmin>
            <ymin>240</ymin>
            <xmax>195</xmax>
            <ymax>371</ymax>
        </bndbox>
    </object>
    <object>
        <name>person</name>
        <pose>Left</pose>
        <truncated>1</truncated>
        <difficult>0</difficult>
        <bndbox>
            <xmin>8</xmin>
            <ymin>12</ymin>
            <xmax>352</xmax>
            <ymax>498</ymax>
        </bndbox>
    </object>
</annotation>

## XML Package를 이용한 Parsing

In [1]:
import sys
import os
import xml.etree.ElementTree as Et
from xml.etree.ElementTree import Element, ElementTree

# 1. xml 파일 open
xml_root_path = '../../datasets/VOCdevkit/VOC2012/'
xml_annotation_path = 'Annotations/2008_000200.xml'

print("XML parsing Start\n")
xml = open(xml_root_path + xml_annotation_path, "r")

# 2. xml 파일 tree분석, width, height, depth 찾기
tree = Et.parse(xml)
root = tree.getroot()

size = root.find("size")
width = size.find("width").text
height = size.find("height").text
channels = size.find("depth").text

print("Image properties\nwidth : {}\nheight : {}\nchannels : {}\n".format(width, height, channels))

# 3. image bbox의 xmin, ymin, xmax, ymax 찾기
objects = root.findall("object")
print("Objects Description")
for _object in objects:
    name = _object.find("name").text
    bndbox = _object.find("bndbox")
    xmin = bndbox.find("xmin").text
    ymin = bndbox.find("ymin").text
    xmax = bndbox.find("xmax").text
    ymax = bndbox.find("ymax").text

    print("class : {}\nxmin : {}\nymin : {}\nxmax : {}\nymax : {}\n".format(name, xmin, ymin, xmax, ymax))

print("XML parsing END")

XML parsing Start

Image properties
width : 500
height : 375
channels : 3

Objects Description
class : person
xmin : 119
ymin : 76
xmax : 184
ymax : 311

class : person
xmin : 266
ymin : 43
xmax : 338
ymax : 323

XML parsing END


## XML Package를 이용한 Parsing 2
- os.walk를 이용한 파일리스트 순회하면서 xml 파싱

In [4]:
import sys
import os
import xml.etree.ElementTree as Et
from xml.etree.ElementTree import Element, ElementTree

# 1. Path 지정
xml_root_path = '../../datasets/VOCdevkit/VOC2012/'
IMAGE_FOLDER = "JPEGImages"
ANNOTATIONS_FOLDER = "Annotations"

# os.walk를 이용해 얻은 파일 리스트를 순회
# root path, annotation path, annotation files path
ann_root, ann_dir, ann_files = next(os.walk(
    os.path.join(xml_root_path, ANNOTATIONS_FOLDER)))

for xml_file in ann_files[:3]:
    xml = open(os.path.join(ann_root, xml_file))
    
    # 2. xml 파일 tree분석, width, height, depth 찾기
    tree = Et.parse(xml)
    root = tree.getroot()

    size = root.find("size")
    width, height, channels = size.find("width").text, size.find("height").text, size.find("depth").text

    print("Image properties\nwidth : {}\nheight : {}\nchannels : {}\n".format(width, height, channels))

    # 3. image bbox의 xmin, ymin, xmax, ymax 찾기
    objects = root.findall("object")
    print("Objects Description")
    for _object in objects:
        name = _object.find("name").text
        bndbox = _object.find("bndbox")
        xmin = bndbox.find("xmin").text
        ymin = bndbox.find("ymin").text
        xmax = bndbox.find("xmax").text
        ymax = bndbox.find("ymax").text

        print("class : {}\nxmin : {}\nymin : {}\nxmax : {}\nymax : {}\n".format(name, xmin, ymin, xmax, ymax))

    print("XML parsing END")


Image properties
width : 500
height : 375
channels : 3

Objects Description
class : person
xmin : 119
ymin : 76
xmax : 184
ymax : 311

class : person
xmin : 266
ymin : 43
xmax : 338
ymax : 323

XML parsing END
Image properties
width : 500
height : 333
channels : 3

Objects Description
class : person
xmin : 21
ymin : 1
xmax : 439
ymax : 333

XML parsing END
Image properties
width : 500
height : 371
channels : 3

Objects Description
class : person
xmin : 60
ymin : 52
xmax : 197
ymax : 371

class : person
xmin : 169
ymin : 133
xmax : 277
ymax : 371

class : person
xmin : 214
ymin : 79
xmax : 298
ymax : 371

class : person
xmin : 268
ymin : 60
xmax : 368
ymax : 371

class : person
xmin : 339
ymin : 27
xmax : 446
ymax : 371

XML parsing END


## Pascal VOC Dataset Image load

In [9]:
import sys
import os
import xml.etree.ElementTree as Et
from xml.etree.ElementTree import Element, ElementTree
from PIL import ImageDraw
from PIL import Image


# 1. Path 지정
xml_root_path = '../../datasets/VOCdevkit/VOC2012/'
IMAGE_FOLDER = "JPEGImages"
ANNOTATIONS_FOLDER = "Annotations"

# os.walk를 이용해 얻은 파일 리스트를 순회
# root path, annotation path, annotation files path
ann_root, ann_dir, ann_files = next(os.walk(
    os.path.join(xml_root_path, ANNOTATIONS_FOLDER)))
img_root, amg_dir, img_files = next(os.walk(
    os.path.join(xml_root_path, IMAGE_FOLDER)))

for xml_file in ann_files[:1]:
    xml = open(os.path.join(ann_root, xml_file))
    
    # XML파일와 이미지파일은 이름이 같으므로, 확장자만 맞춰서 찾습니다.
    img_name = img_files[img_files.index(".".join([xml_file.split(".")[0], "jpg"]))]
    img_file = os.path.join(img_root, img_name)
    image = Image.open(img_file).convert("RGB")
    print('Image properties\n',image.size, '\n')
        
    # 2. xml 파일 tree분석, width, height, depth 찾기
    tree = Et.parse(xml)
    root = tree.getroot()

    size = root.find("size")
    width, height, channels = size.find("width").text, size.find("height").text, size.find("depth").text

    print("Image properties\nwidth : {}\nheight : {}\nchannels : {}\n".format(width, height, channels))

    # 3. image bbox의 xmin, ymin, xmax, ymax 찾기
    objects = root.findall("object")
    print("Objects Description")
    for _object in objects:
        name = _object.find("name").text
        bndbox = _object.find("bndbox")
        xmin = bndbox.find("xmin").text
        ymin = bndbox.find("ymin").text
        xmax = bndbox.find("xmax").text
        ymax = bndbox.find("ymax").text

        print("class : {}\nxmin : {}\nymin : {}\nxmax : {}\nymax : {}\n".format(name, xmin, ymin, xmax, ymax))

    print("XML parsing END")


Image properties
 (500, 375) 

Image properties
width : 500
height : 375
channels : 3

Objects Description
class : person
xmin : 119
ymin : 76
xmax : 184
ymax : 311

class : person
xmin : 266
ymin : 43
xmax : 338
ymax : 323

XML parsing END
