In [1]:
import shutil
import xml.etree.ElementTree as ET
import os
from ultralytics import YOLO

import torch
from sklearn.model_selection import train_test_split

In [2]:
print("Number of GPU: ", torch.cuda.device_count())
print("GPU Name: ", torch.cuda.get_device_name())


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Number of GPU:  1
GPU Name:  NVIDIA GeForce RTX 3050 Laptop GPU
Using device: cuda


In [3]:
word_xml_path = "Dataset/words.xml"
root = ET.parse(word_xml_path).getroot()

img_paths = []
img_sizes = []
img_labels = []
bounding_boxes = []

for img in root:
    bbox_word = []
    label_word = []
    for words in img.findall("taggedRectangles"):
        for word in words:
            if not word[0].text.isalnum():
                continue
            if "é" in word[0].text.lower() or "ñ" in word[0].text.lower():
                continue
            bbox_word.append([
                float(word.attrib["x"]),
                float(word.attrib["y"]),
                float(word.attrib["width"]),
                float(word.attrib["height"])
            ])
            label_word.append(word[0].text.lower())
    
    img_paths.append(img[0].text)
    img_sizes.append((int(img[1].attrib["x"]), int(img[1].attrib["y"])))
    img_labels.append(label_word)
    bounding_boxes.append(bbox_word)
    
print(img_paths[0], img_sizes[0], img_labels[0], bounding_boxes[0])

apanar_06.08.2002/IMG_1261.JPG (1600, 1200) ['self', 'adhesive', 'address', 'labels', '36', '89m', 'cls', '250', 'on', 'a', 'roll'] [[174.0, 392.0, 274.0, 195.0], [512.0, 391.0, 679.0, 183.0], [184.0, 612.0, 622.0, 174.0], [863.0, 599.0, 446.0, 187.0], [72.0, 6.0, 95.0, 87.0], [247.0, 2.0, 197.0, 88.0], [792.0, 0.0, 115.0, 81.0], [200.0, 848.0, 228.0, 139.0], [473.0, 878.0, 165.0, 109.0], [684.0, 878.0, 71.0, 106.0], [806.0, 844.0, 218.0, 141.0]]


In [4]:
def convert_to_yolov8_format(img_paths, img_sizes, bounding_boxes):
    yolov8_data = []
    for img_path, img_size, bounding_box in zip(img_paths, img_sizes, bounding_boxes):
        img_width, img_height = img_size
        labels = []
        
        for bbox in bounding_box:
            x, y, w, h = bbox
            
            center_x = (x + w / 2) / img_width
            center_y = (y + h / 2) / img_height
            nomal_w = w / img_width
            nomal_h = h / img_height
            
            class_id = 0
            
            yolov8_label = f"{class_id} {center_x} {center_y} {nomal_w} {nomal_h}"
            labels.append(yolov8_label)
        
        yolov8_data.append((img_path, labels))
    
    return yolov8_data

yolov8_data = convert_to_yolov8_format(img_paths, img_sizes, bounding_boxes)
yolov8_data[0]

('apanar_06.08.2002/IMG_1261.JPG',
 ['0 0.194375 0.40791666666666665 0.17125 0.1625',
  '0 0.5321875 0.40208333333333335 0.424375 0.1525',
  '0 0.309375 0.5825 0.38875 0.145',
  '0 0.67875 0.5770833333333333 0.27875 0.15583333333333332',
  '0 0.0746875 0.04125 0.059375 0.0725',
  '0 0.2159375 0.03833333333333333 0.123125 0.07333333333333333',
  '0 0.5309375 0.03375 0.071875 0.0675',
  '0 0.19625 0.7645833333333333 0.1425 0.11583333333333333',
  '0 0.3471875 0.7770833333333333 0.103125 0.09083333333333334',
  '0 0.4496875 0.7758333333333334 0.044375 0.08833333333333333',
  '0 0.571875 0.7620833333333333 0.13625 0.1175'])

In [5]:
seed = 1
vaid_rate = 0.2
test_rate = 0.125
train_data, remain_data = train_test_split(yolov8_data, test_size=vaid_rate, random_state=seed ,shuffle=True)
valid_data, test_data = train_test_split(remain_data, test_size=test_rate,random_state= seed ,shuffle=True)

len(train_data), len(valid_data), len(test_data)

(200, 43, 7)

In [6]:
def save_data(data, scr_img_dir, save_dir):
    os.makedirs(save_dir, exist_ok=True)
    os.makedirs(os.path.join(save_dir, "images"), exist_ok=True)
    os.makedirs(os.path.join(save_dir, "labels"), exist_ok=True)
    
    for img_path, labels in data:
        shutil.copy(
            os.path.join(scr_img_dir, img_path),
            os.path.join(save_dir, "images")
        )
        
        img_name = os.path.basename(img_path)
        img_name = os.path.splitext(img_name)[0]
        
        with open(os.path.join(save_dir, "labels", f"{img_name}.txt"), "w") as f:
            for label in labels:
                f.write(f"{label}\n")

os.makedirs("Dataset/yolo_data", exist_ok=True)
save_train_dir = os.path.join("Dataset/yolo_data", "train")
save_test_dir = os.path.join("Dataset/yolo_data", "test")
save_valid_dir = os.path.join("Dataset/yolo_data", "val")

save_data(train_data, "Dataset", save_train_dir)
save_data(valid_data, "Dataset", save_valid_dir)
save_data(test_data, "Dataset", save_test_dir)

In [7]:
yaml_path = os.path.join("Dataset/yolo_data", "data.yaml")

device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = YOLO("yolov8s.yaml").load("yolov8s.pt").to(device)

img_size = 512
batch = 2

torch.cuda.empty_cache()

results = model.train(
    data=yaml_path,
    epochs=100,
    imgsz=img_size,
    batch= batch,
    device=device
)


In [8]:
model_path = "runs\\detect\\train42\\weights\\best.pt"
model = YOLO(model=model_path)

model.val()

In [9]:
img_path = "Dataset\\yolo_data\\val\\images\\from_joanne.jpg"
test = model.predict(img_path, show=True)