In [1]:
from pyquery import PyQuery as pq
from collections import namedtuple
from pathlib import Path

In [160]:
Coor = namedtuple('Coor', ['xmin', 'ymin', 'xmax', 'ymax'])
NormCoor = namedtuple('NormCoor', ['class_name','xcenter', 'ycenter', 'width', 'height'])
Size = namedtuple('Size', ['w', 'h'])
Box = namedtuple('Box', ['name', 'size', 'coor_lst'])

In [161]:
def get_cordinates(filename):
    doc = pq(filename=filename, parser='xml')
    raw_list = []
    raw_list.append(doc('filename')[0].text)
    size = [int(doc('width')[0].text), int(doc('height')[0].text)]
    raw_list.append(size) # [width, height]
    class_name = doc('folder')[0].text

    for obj in doc('object'):
        obj_name = pq(obj)('name')[0].text
        if obj_name == class_name:
            boxes = pq(obj)('bndbox')
            assert len(boxes) == 1
            xylist = [int(boxes('xmin')[0].text),
                     int(boxes('ymin')[0].text),
                     int(boxes('xmax')[0].text),
                     int(boxes('ymax')[0].text)]
            raw_list.append(xylist)
        else:
            pass
    return wrap(raw_list)

def wrap(raw_list):
    name = raw_list[0]
    size = Size(*raw_list[1])
    coor_lst = [Coor(*l) for l in raw_list[2:]]
    return Box(name, size, coor_lst)

In [165]:
def normalize_coor(box):
    bb_list = []
    for coor in box.coor_lst:
        class_name = box.name.split("_")[0]
        xcenter = (coor.xmin + coor.xmax) / 2 / box.size.w
        ycenter = (coor.ymin + coor.ymax) / 2 / box.size.h
        width = (coor.xmax - coor.xmin) / box.size.w
        height = (coor.ymax - coor.ymin) / box.size.h
        norm_coor = NormCoor(class_name, xcenter, ycenter, width, height)
        bb_list.append(norm_coor)
    return bb_list
        

In [183]:
def write_to_txt(bb_list, path):
    path = Path(path)
    file_name = bb_list[0].class_name
    with path.open("w") as fout:
        for coor in bb_list:
            fout.write(f"{file_name} {coor.xcenter} {coor.ycenter} {coor.width} {coor.height}\n")
            
            

In [199]:
p = Path('bbox')
p.mkdir(parents=True, exist_ok=True)
bb_p = Path('bb')
for old_path in bb_p.glob('*/*.xml'):
    filename = p / Path(old_path.stem).with_suffix('.txt') 
    box = get_cordinates(old_path)
    bb_list = normalize_coor(box)
    write_to_txt(bb_list, filename)


    