In [2]:
import os
import pandas as pd
import xml.etree.ElementTree as ET

In [3]:
bbox_file = "./docs/bbox_annotations.xml"

In [4]:
tabela_classes = {
    0: "acerola",
    1: "lemon",
    2: "cherry_tomato",
    3: "khaki",
    4: "banana",
    5: "lime",
    6: "clove_lemon",
    7: "avocado",
    8: "bergamot",
    9: "pear"
}

In [5]:
def carregar_anotacoes(caminho_do_xml):

    if not os.path.exists(caminho_do_xml):
        print(f"ERRO: Arquivo de anotações não encontrado em '{caminho_do_xml}'")
        return None

    tree = ET.parse(caminho_do_xml)
    root = tree.getroot()
    anotacoes = {}

    for image_elem in root.findall('image'):
        filename = image_elem.get('name')
        box_elem = image_elem.find('box')
        if filename is not None and box_elem is not None:
            xmin = int(float(box_elem.get('xtl')))
            ymin = int(float(box_elem.get('ytl')))
            xmax = int(float(box_elem.get('xbr')))
            ymax = int(float(box_elem.get('ybr')))
            anotacoes[filename] = {
                'xmin': xmin,
                'ymin': ymin,
                'xmax': xmax,
                'ymax': ymax
            }

    return anotacoes

In [6]:
anotacoes = carregar_anotacoes(bbox_file)
print(anotacoes)

{'0-01-V1-B.png': {'xmin': 125, 'ymin': 209, 'xmax': 206, 'ymax': 289}, '0-01-V1-W.png': {'xmin': 132, 'ymin': 218, 'xmax': 210, 'ymax': 288}, '0-01-V2-B.png': {'xmin': 127, 'ymin': 193, 'xmax': 219, 'ymax': 272}, '0-01-V2-W.png': {'xmin': 134, 'ymin': 194, 'xmax': 231, 'ymax': 282}, '0-02-V1-B.png': {'xmin': 117, 'ymin': 180, 'xmax': 220, 'ymax': 279}, '0-02-V1-W.png': {'xmin': 118, 'ymin': 211, 'xmax': 217, 'ymax': 302}, '0-02-V2-B.png': {'xmin': 125, 'ymin': 172, 'xmax': 204, 'ymax': 263}, '0-02-V2-W.png': {'xmin': 131, 'ymin': 191, 'xmax': 200, 'ymax': 277}, '0-03-V1-B.png': {'xmin': 126, 'ymin': 199, 'xmax': 209, 'ymax': 280}, '0-03-V1-W.png': {'xmin': 118, 'ymin': 188, 'xmax': 219, 'ymax': 288}, '0-03-V2-B.png': {'xmin': 136, 'ymin': 191, 'xmax': 202, 'ymax': 270}, '0-03-V2-W.png': {'xmin': 134, 'ymin': 204, 'xmax': 212, 'ymax': 296}, '0-04-V1-B.png': {'xmin': 123, 'ymin': 208, 'xmax': 187, 'ymax': 275}, '0-04-V1-W.png': {'xmin': 131, 'ymin': 212, 'xmax': 210, 'ymax': 295}, '0-04

In [7]:
def extrair_class_id(file_name):
    return int(file_name.split('-')[0])

def extrair_class_name(class_id):
    return tabela_classes.get(class_id, "unknown")

In [8]:
df = pd.DataFrame([
    {
        "file_name": file_name,
        "class_name": extrair_class_name(extrair_class_id(file_name)),
        "xmin": data["xmin"],
        "ymin": data["ymin"],
        "xmax": data["xmax"],
        "ymax": data["ymax"]
    }
    for file_name, data in anotacoes.items()
])

print(df)

         file_name class_name  xmin  ymin  xmax  ymax
0    0-01-V1-B.png    acerola   125   209   206   289
1    0-01-V1-W.png    acerola   132   218   210   288
2    0-01-V2-B.png    acerola   127   193   219   272
3    0-01-V2-W.png    acerola   134   194   231   282
4    0-02-V1-B.png    acerola   117   180   220   279
..             ...        ...   ...   ...   ...   ...
195  9-04-V2-W.png       pear    97   125   249   315
196  9-05-V1-B.png       pear   107   136   247   282
197  9-05-V1-W.png       pear    91   156   250   304
198  9-05-V2-B.png       pear    96   124   245   352
199  9-05-V2-W.png       pear   103   116   244   337

[200 rows x 6 columns]


In [9]:
df.to_csv("./docs/dataset_bbox.csv", index=False)