In [3]:
import xml.etree.ElementTree as ET
import glob
import os
import json


def xml_to_yolo_bbox(bbox, w, h):
    # xmin, ymin, xmax, ymax
    x_center = ((bbox[2] + bbox[0]) / 2) / w
    y_center = ((bbox[3] + bbox[1]) / 2) / h
    width = (bbox[2] - bbox[0]) / w
    height = (bbox[3] - bbox[1]) / h
    return [x_center, y_center, width, height]


def yolo_to_xml_bbox(bbox, w, h):
    # x_center, y_center width heigth
    w_half_len = (bbox[2] * w) / 2
    h_half_len = (bbox[3] * h) / 2
    xmin = int((bbox[0] * w) - w_half_len)
    ymin = int((bbox[1] * h) - h_half_len)
    xmax = int((bbox[0] * w) + w_half_len)
    ymax = int((bbox[1] * h) + h_half_len)
    return [xmin, ymin, xmax, ymax]


classes = []
input_dir = "annotations/"
output_dir = "labels/"
image_dir = "images/"

# create the labels folder (output directory)
# os.mkdir(output_dir)

# identify all the xml files in the annotations folder (input directory)
files = glob.glob(os.path.join(input_dir, '*.xml'))
# loop through each 
for fil in files:
    basename = os.path.basename(fil)
    ls = os.path.splitext(basename)
    filename = os.path.splitext(basename)[0]
    if len(ls)>2:
        ls.pop(len(ls)-1)
        filename = ".".join(ls)
    # check if the label contains the corresponding image file
    if not os.path.exists(os.path.join(image_dir, f"{filename}.png")):
        print(f"{filename} image does not exist!")
        continue

    result = []

    # parse the content of the xml file
    tree = ET.parse(fil)
    root = tree.getroot()
    width = int(root.find("size").find("width").text)
    height = int(root.find("size").find("height").text)

    for obj in root.findall('object'):
        label = obj.find("name").text
        # check for new classes and append to list
        if label not in classes:
            classes.append(label)
        index = classes.index(label)
        pil_bbox = [int(x.text) for x in obj.find("bndbox")]
        yolo_bbox = xml_to_yolo_bbox(pil_bbox, width, height)
        # convert data to string
        bbox_string = " ".join([str(x) for x in yolo_bbox])
        result.append(f"{index} {bbox_string}")

    if result:
        # generate a YOLO format text file for each xml file
        with open(os.path.join(output_dir, f"{filename}.txt"), "w", encoding="utf-8") as f:
            f.write("\n".join(result))

# generate the classes file as reference
with open('classes.txt', 'w', encoding='utf8') as f:
    f.write(json.dumps(classes))

1971167142-357273695-KX-H1H3-10-20220314DRAFTPAPERWORKGOEH2 image does not exist!
82075757-82075745-KX-H1H3-1-GLOBALORCANICS001000010900000038961FCLWEEK10 image does not exist!
KX-A0I4-14-shipmentId-BL235000996683-ETD21 image does not exist!
KX-A0I4-15-shipmentId-BL235000770103-ETD21 image does not exist!
KX-A0I4-28-BL235001042292-ETD18 image does not exist!
KX-A0I4-29-BL235001089221-ETD18 image does not exist!
KX-A0I4-30-BL235001028265-ETD18 image does not exist!
KX-A0I4-TEMP-560-ELEOCIPL_BMOU4831175 image does not exist!
KX-B7H7-26-Shippingdocuemnts-CHN image does not exist!
KX-B7H7-41-2872699INVOICEPACKINGLISTPOWHS-14556-6496296SHEETSETCINCINNA image does not exist!
KX-B7H7-46-2020 image does not exist!
KX-B7H7-47-2020 image does not exist!
KX-B7H7-49-2903906INVOICEPACKINGLISTPOWHS-14556-6496295SHEETSETCINCINNATI image does not exist!
KX-B7H7-56-2961752AINVOICEPACKINGLISTPOWHS-14556-6496326SHEETSETLACONT image does not exist!
KX-C8B5-8-shipmentId-204491309XinhuiJBHUPackingListCommer

In [14]:
ls = os.path.splitext(basename)
filename = os.path.splitext(basename)[0]
if len(ls)>2:
    ls.pop(len(ls)-1)
    filename = ".".join(ls)

import shutil
    
f = []
dr = "D:/l/"

for doc in glob.glob("C:\\Users\\KartikayGupta\\Xml To Yolo\\L\\labels\\*.txt"):
    ls = doc.split("\\")[len(doc.split("\\"))-1].split(".")
    filename = ls[0]
    f.append(filename)
    shutil.copy(doc, dr)
    
    





In [15]:
for doc in glob.glob("C:\\Users\\KartikayGupta\\Xml To Yolo\\L\\images\\*.png"):
    ls = doc.split("\\")[len(doc.split("\\"))-1].split(".")
    filename = ls[0]
    if filename in f:
        shutil.copy(doc, dr)