In [1]:
import pandas as pd
import numpy as np
import os
import xml.etree.ElementTree as et

In [2]:
IMAGE_PATH = './images/'
XML_PATH = './annotations/'

In [3]:
def get_data_from_xml(file):
    
    xtree = et.parse(XML_PATH + file)
    xroot = xtree.getroot() 
       
    filename = xroot.find('filename').text
    size = xroot.find('size')
    width, height = int(size.find('width').text), int(size.find('height').text)
    
    data = pd.DataFrame()
    boxes = []
    labels = []
    xmins, ymins, xmaxs, ymaxs = [], [], [], []
    for elem in xroot.findall('object'):
        label = elem.find('name').text
        label = 'without_mask' if label == 'mask_weared_incorrect' else label
        labels.append(label)
        box = elem.find('bndbox')
        xmins.append(int(box.find('xmin').text))
        ymins.append(int(box.find('ymin').text))
        xmaxs.append(int(box.find('xmax').text))
        ymaxs.append(int(box.find('ymax').text))
        
    df = {'xmins': [xmins], 'xmaxs': [xmaxs],
          'ymins': [ymins], 'ymaxs': [ymaxs],
          'width': [width], 'height': [height],
          'filename': [filename], 
          'labels': [labels]}
    data = pd.concat([data, pd.DataFrame(df)], ignore_index=True)
            
    return data

def get_dataframe_from_xmls():
    files = list(sorted(os.listdir(XML_PATH)))
    data = pd.DataFrame()
    for file in files:
        data = pd.concat([data, get_data_from_xml(file)], ignore_index=True)
    return data

In [4]:
data = get_dataframe_from_xmls()
data.to_csv('./image_data.csv', sep=';', index=False)