### Trích xuất thông tin từ XML

In [9]:
import xml.etree.ElementTree as ET
import os

In [20]:
def extract_data_from_xml(rootdir, xml_file):
    tree = ET.parse(os.path.join(rootdir, xml_file))
    root = tree.getroot()
    data = []
    
    img_paths = []
    img_sizes = []
    img_labels = []
    bboxs = []

    for img in root:
        bbs_of_img = []
        labels_of_img = []

        for bbs in img.findall('taggedRectangles'):
            for bb in bbs.findall('taggedRectangle'):
                x = float(bb.attrib['x'])
                y = float(bb.attrib['y'])
                w = float(bb.attrib['width'])
                h = float(bb.attrib['height'])
                bbs_of_img.append([x, y, w, h])
                labels_of_img.append(bb[0].text.lower())

        img_paths.append(os.path.join(rootdir, img[0].text))
        img_labels.append(labels_of_img)
        img_sizes.append([float(img[1].attrib['x']),
                         float(img[1].attrib['y'])])
        bboxs.append(bbs_of_img)

    return img_paths, img_sizes, img_labels, bboxs
img_paths, img_sizes, img_labels, bboxs = extract_data_from_xml('icdar2003\SceneTrialTrain','words.xml')
print("img__paths",img_paths[0])
print("img_sizes",img_sizes[0])
print("img_labels",img_labels[0])
print("bboxs",bboxs[0])

img__paths icdar2003\SceneTrialTrain\apanar_06.08.2002/IMG_1261.JPG
img_sizes [1600.0, 1200.0]
img_labels ['self', 'adhesive', 'address', 'labels', '36', '89m', 'cls', '250', 'on', 'a', 'roll']
bboxs [[174.0, 392.0, 274.0, 195.0], [512.0, 391.0, 679.0, 183.0], [184.0, 612.0, 622.0, 174.0], [863.0, 599.0, 446.0, 187.0], [72.0, 6.0, 95.0, 87.0], [247.0, 2.0, 197.0, 88.0], [792.0, 0.0, 115.0, 81.0], [200.0, 848.0, 228.0, 139.0], [473.0, 878.0, 165.0, 109.0], [684.0, 878.0, 71.0, 106.0], [806.0, 844.0, 218.0, 141.0]]


### Chuyển đổi sang định dạng YOLOv11

In [22]:
def convert_to_yolo_format(img_paths, img_sizes, bboxs):
    yolo_data = []
    for i in range(len(bboxs)):
        data_img = []
        w_img, h_img = img_sizes[i]
        for bbox in bboxs[i]:
            x = bbox[0]
            y = bbox[1]
            w = bbox[2]
            h = bbox[3]

            # Convert to x_center, y_center, w, h
            x_center = x + w / 2
            y_center = y + h / 2

            # Normalize
            x_center /= w_img
            y_center /= h_img
            w /= w_img
            h /= h_img

            # Because we only have one class, we set class_id to 0
            label = 0

            # Format YOLO label
            yolo_label = f"{label} {x_center} {y_center} {w} {h}"
            data_img.append(yolo_label)
        yolo_data.append((img_paths[i], data_img))
    return yolo_data

yolo_data = convert_to_yolo_format(img_paths, img_sizes, bboxs)
yolo_data[0]

('icdar2003\\SceneTrialTrain\\apanar_06.08.2002/IMG_1261.JPG',
 ['0 0.194375 0.40791666666666665 0.17125 0.1625',
  '0 0.5321875 0.40208333333333335 0.424375 0.1525',
  '0 0.309375 0.5825 0.38875 0.145',
  '0 0.67875 0.5770833333333333 0.27875 0.15583333333333332',
  '0 0.0746875 0.04125 0.059375 0.0725',
  '0 0.2159375 0.03833333333333333 0.123125 0.07333333333333333',
  '0 0.5309375 0.03375 0.071875 0.0675',
  '0 0.19625 0.7645833333333333 0.1425 0.11583333333333333',
  '0 0.3471875 0.7770833333333333 0.103125 0.09083333333333334',
  '0 0.4496875 0.7758333333333334 0.044375 0.08833333333333333',
  '0 0.571875 0.7620833333333333 0.13625 0.1175'])

### Lưu vào folder mới