In [1]:
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np
import os
import glob

In [2]:
Ally_xml_paths = glob.glob('data/Ally/xml/*')
Enemy_xml_paths = glob.glob('data/Enemy/xml/*')

In [15]:
def get_label(obj):
    return obj.find('name').text

def get_name(path):
    return os.path.join(path.split('\\')[-2], path.split('\\')[-1].split('_')[-2], path.split('\\')[-1])

def get_box(obj):
    bboxs = obj.findall('bndbox')
    for bbox in bboxs:
        xmin = bbox.find('xmin').text
        ymin = bbox.find('ymin').text
        xmax = bbox.find('xmax').text
        ymax = bbox.find('ymax').text
    return [int(xmin), int(ymin), int(xmax), int(ymax)]

def to_numerical(label):
    str2num = {
        'ally_robot': 1,
        'ally_armor': 2,
        'enemy_robot': 3,
        'enemy_armor': 4,
    }
    
    return str2num[label]

In [17]:
new_df = pd.DataFrame()

for xml_path in Ally_xml_paths:
    labels = []
    boxes = []
    paths = []
    temp_df = pd.DataFrame()
    
    tree = ET.parse(xml_path)
    objs = tree.findall('object')
    path = tree.find('path').text
    paths.append(get_name(path))

    for obj in objs:
        labels.append(get_label(obj))
        boxes.append(get_box(obj))
    
    labels = np.array(labels)
    boxes = np.array(boxes)
    
    temp_df['path'] = paths * len(boxes)
    temp_df['label'] = labels
    
    temp_df['xmin'] = boxes[:, 0]
    temp_df['ymin'] = boxes[:, 1]
    temp_df['xmax'] = boxes[:, 2]
    temp_df['ymax'] = boxes[:, 3]
    
    new_df = pd.concat([new_df, temp_df], axis=0)
    
for xml_path in Enemy_xml_paths:
    labels = []
    boxes = []
    paths = []
    temp_df = pd.DataFrame()
    
    tree = ET.parse(xml_path)
    objs = tree.findall('object')
    path = tree.find('path').text
    paths.append(get_name(path))
    
    for obj in objs:
        labels.append(get_label(obj))
        boxes.append(get_box(obj))
    
    labels = np.array(labels)
    boxes = np.array(boxes)
    
    temp_df['path'] = paths * len(boxes)
    temp_df['label'] = labels
    
    temp_df['xmin'] = boxes[:, 0]
    temp_df['ymin'] = boxes[:, 1]
    temp_df['xmax'] = boxes[:, 2]
    temp_df['ymax'] = boxes[:, 3]
    
    new_df = pd.concat([new_df, temp_df], axis=0)

In [19]:
bbox_df = new_df.reset_index(drop=True)
bbox_df['num_label'] = [to_numerical(label) for label in bbox_df['label'].values]
bbox_df

Unnamed: 0,path,label,xmin,ymin,xmax,ymax,num_label
0,Ally\left\left_0.png,ally_robot,280,51,749,606,1
1,Ally\left\left_0.png,ally_armor,445,427,584,504,2
2,Ally\left\left_1.png,ally_armor,513,448,632,519,2
3,Ally\left\left_1.png,ally_robot,302,55,914,668,1
4,Ally\left\left_10.png,ally_robot,571,156,958,527,1
...,...,...,...,...,...,...,...
470,Enemy\right\right_7.png,enemy_armor,264,393,334,441,4
471,Enemy\right\right_8.png,enemy_robot,583,170,921,524,3
472,Enemy\right\right_8.png,enemy_armor,671,385,739,432,4
473,Enemy\right\right_9.png,enemy_robot,563,145,961,514,3


In [74]:
bbox_df.to_csv('data/bbox_df.csv', index=False)