In [1]:
import os
import random
import time
import xml.etree.ElementTree as ET

import cv2
import matplotlib.pyplot as plt
import numpy as np
import timm
import torch
import torch.nn as nn
import torchvision
from PIL import Image
from sklearn.model_selection import train_test_split

from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def extract_data_from_xml(rootdir, xml_file):
    tree = ET.parse(os.path.join(rootdir, xml_file))
    root = tree.getroot()
    data = []

    img_paths = []
    img_sizes = []
    img_labels = []
    bboxs = []

    for img in root:
        bbs_of_img = []
        labels_of_img = []

        for bbs in img.findall('taggedRectangles'):
            for bb in bbs.findall('taggedRectangle'):
                x = float(bb.attrib['x'])
                y = float(bb.attrib['y'])
                w = float(bb.attrib['width'])
                h = float(bb.attrib['height'])
                bbs_of_img.append([x, y, w, h])
                labels_of_img.append(bb[0].text.lower())

        img_paths.append(os.path.join(rootdir, img[0].text))
        img_labels.append(labels_of_img)
        img_sizes.append([float(img[1].attrib['x']),
                         float(img[1].attrib['y'])])
        bboxs.append(bbs_of_img)

    return img_paths, img_sizes, img_labels, bboxs


img_paths, img_sizes, img_labels, bboxs = extract_data_from_xml(
    'icdar2003/SceneTrialTrain', 'words.xml')
print("img__paths", img_paths[0])
print("img_sizes", img_sizes[0])
print("img_labels", img_labels[0])
print("bboxs", bboxs[0])

img__paths icdar2003/SceneTrialTrain\apanar_06.08.2002/IMG_1261.JPG
img_sizes [1600.0, 1200.0]
img_labels ['self', 'adhesive', 'address', 'labels', '36', '89m', 'cls', '250', 'on', 'a', 'roll']
bboxs [[174.0, 392.0, 274.0, 195.0], [512.0, 391.0, 679.0, 183.0], [184.0, 612.0, 622.0, 174.0], [863.0, 599.0, 446.0, 187.0], [72.0, 6.0, 95.0, 87.0], [247.0, 2.0, 197.0, 88.0], [792.0, 0.0, 115.0, 81.0], [200.0, 848.0, 228.0, 139.0], [473.0, 878.0, 165.0, 109.0], [684.0, 878.0, 71.0, 106.0], [806.0, 844.0, 218.0, 141.0]]


### Chuẩn bị bộ dữ liệu:

In [3]:
def split_bounding_boxes(img_paths, img_labels, bboxes, save_dir):
    image_dir = os.path.join(save_dir, "images")
    os.makedirs(image_dir, exist_ok=True)
    for i in range(len(img_paths)):
        img = Image.open(img_paths[i])

        labels =[]
        count = 0
        for j in range(len(img_labels[i])):
            bboxs = bboxes[i][j]
            label = img_labels[i][j]
            left = int(bboxs[0])
            upper = int(bboxs[1])
            right = int(bboxs[0] + bboxs[2])
            bottom = int(bboxs[1] + bboxs[3])
            
            

            img_crop = img.crop((left, upper, right, bottom))
            # Bỏ qua ảnh nếu kích thước nhỏ hơn 10x10
            if img_crop.size[0] < 10 or img_crop.size[1] < 10:
                continue

            img_save_path = os.path.join(image_dir, f"{count:06d}.png")
            img_crop.save(img_save_path)

            label = img_save_path + "\t" + label
            labels.append(label)
            
            count += 1
        print(f"Created {count} images")

        # Write labels to a text file
        labels_txt_path = os.path.join(save_dir, "labels.txt")

        with open(labels_txt_path, "w") as f:
            for label in labels:
                f.write(f"{label}\n")

save_dir = "datasets/ocr_dataset"
split_bounding_boxes(img_paths, img_labels, bboxs, save_dir)

Created 11 images
Created 1 images
Created 10 images
Created 18 images
Created 8 images
Created 8 images
Created 12 images
Created 15 images
Created 7 images
Created 15 images
Created 3 images
Created 9 images
Created 4 images
Created 4 images
Created 1 images
Created 14 images
Created 1 images
Created 9 images
Created 9 images
Created 8 images
Created 4 images
Created 12 images
Created 42 images
Created 6 images
Created 2 images
Created 3 images
Created 1 images
Created 3 images
Created 6 images
Created 18 images
Created 2 images
Created 11 images
Created 14 images
Created 11 images
Created 3 images
Created 8 images
Created 6 images
Created 1 images
Created 2 images
Created 1 images
Created 3 images
Created 4 images
Created 8 images
Created 4 images
Created 6 images
Created 4 images
Created 4 images
Created 10 images
Created 6 images
Created 3 images
Created 3 images
Created 2 images
Created 4 images
Created 1 images
Created 2 images
Created 4 images
Created 4 images
Created 1 images
