In [69]:
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np
import glob

In [70]:
Ally_xml_paths = glob.glob('../input/rmua-data/label_img/labeled/labeled_Ally/*')
Enemy_xml_paths = glob.glob('../input/rmua-data/label_img/labeled/labeled_Enemy/*')

In [71]:
def get_label(obj):
    return obj.find('name').text

def get_name(path):
    return ('/').join(path.split('\\')[-2:])

def get_box(obj):
    bboxs = obj.findall('bndbox')
    for bbox in bboxs:
        xmin = bbox.find('xmin').text
        ymin = bbox.find('ymin').text
        xmax = bbox.find('xmax').text
        ymax = bbox.find('ymax').text
    return [int(xmin), int(ymin), int(xmax), int(ymax)]

def to_numerical(label):
    str2num = {
        'ally_robot': 1,
        'ally_armor': 2,
        'enemy_robot': 3,
        'enemy_armor': 4,
    }
    
    return str2num[label]

In [72]:
new_df = pd.DataFrame()

for xml_path in Ally_xml_paths:
    labels = []
    boxes = []
    paths = []
    temp_df = pd.DataFrame()
    
    tree = ET.parse(xml_path)
    objs = tree.findall('object')
    path = tree.find('path').text
    paths.append(get_name(path))
    
    for obj in objs:
        labels.append(get_label(obj))
        boxes.append(get_box(obj))
    
    labels = np.array(labels)
    boxes = np.array(boxes)
    
    temp_df['path'] = paths * len(boxes)
    temp_df['label'] = labels
    
    temp_df['xmin'] = boxes[:, 0]
    temp_df['ymin'] = boxes[:, 1]
    temp_df['xmax'] = boxes[:, 2]
    temp_df['ymax'] = boxes[:, 3]
    
    new_df = pd.concat([new_df, temp_df], axis=0)
    
for xml_path in Enemy_xml_paths:
    labels = []
    boxes = []
    paths = []
    temp_df = pd.DataFrame()
    
    tree = ET.parse(xml_path)
    objs = tree.findall('object')
    path = tree.find('path').text
    paths.append(get_name(path))
    
    for obj in objs:
        labels.append(get_label(obj))
        boxes.append(get_box(obj))
    
    labels = np.array(labels)
    boxes = np.array(boxes)
    
    temp_df['path'] = paths * len(boxes)
    temp_df['label'] = labels
    
    temp_df['xmin'] = boxes[:, 0]
    temp_df['ymin'] = boxes[:, 1]
    temp_df['xmax'] = boxes[:, 2]
    temp_df['ymax'] = boxes[:, 3]
    
    new_df = pd.concat([new_df, temp_df], axis=0)

In [73]:
bbox_df = new_df.reset_index(drop=True)
bbox_df['num_label'] = [to_numerical(label) for label in bbox_df['label'].values]
bbox_df

Unnamed: 0,path,label,xmin,ymin,xmax,ymax,num_label
0,Ally/left_25.png,ally_robot,637,212,857,465,1
1,Ally/left_25.png,ally_armor,711,369,769,404,2
2,Ally/left_3.png,ally_robot,339,53,927,670,1
3,Ally/left_3.png,ally_armor,534,456,701,552,2
4,Ally/left_11.png,ally_robot,572,134,976,521,1
...,...,...,...,...,...,...,...
470,Enemy/left_33.png,enemy_armor,645,352,673,376,4
471,Enemy/right_29.png,enemy_robot,647,173,924,484,3
472,Enemy/right_29.png,enemy_armor,764,392,831,434,4
473,Enemy/left_35.png,enemy_robot,637,231,826,422,3


In [74]:
bbox_df.to_csv('bbox_df.csv', index=False)