In [2]:
import os
import xml.etree.ElementTree as ET
import glob
from tqdm import tqdm
import shutil
import yaml
from ultralytics import YOLO

# Thiết lập đường dẫn
PROJECT_DIR = os.path.abspath(os.path.join(os.getcwd(), '..'))
DATA_DIR = os.path.join(PROJECT_DIR, "data")
RAW_DIR = os.path.join(DATA_DIR, "raw")
PROCESSED_DIR = os.path.join(DATA_DIR, "processed")
MODEL_DIR = os.path.join(PROJECT_DIR, "models")

# Tạo thư mục processed và models nếu chưa tồn tại
os.makedirs(PROCESSED_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)

# Hàm chuyển đổi annotation từ XML sang định dạng YOLO
def convert_annotation(xml_file, class_map):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)
    
    out_file = xml_file.replace('.xml', '.txt').replace(RAW_DIR, PROCESSED_DIR)
    
    with open(out_file, 'w') as f:
        for obj in root.iter('object'):
            cls = obj.find('name').text
            if cls not in class_map:
                continue
            cls_id = class_map[cls]
            xmlbox = obj.find('bndbox')
            b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
            bb = ((b[0] + b[1]) / 2 / w, (b[2] + b[3]) / 2 / h, (b[1] - b[0]) / w, (b[3] - b[2]) / h)
            f.write(f"{cls_id} {bb[0]:.6f} {bb[1]:.6f} {bb[2]:.6f} {bb[3]:.6f}\n")

# Lấy tất cả các lớp
classes = set()
for xml_file in glob.glob(os.path.join(RAW_DIR, "train", "*.xml")):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    for obj in root.iter('object'):
        classes.add(obj.find('name').text)

class_map = {cls: idx for idx, cls in enumerate(sorted(classes))}

# Chuyển đổi annotations và sao chép ảnh
for subset in ['train', 'valid']:
    os.makedirs(os.path.join(PROCESSED_DIR, subset), exist_ok=True)
    for xml_file in tqdm(glob.glob(os.path.join(RAW_DIR, subset, "*.xml")), desc=f"Đang chuyển đổi annotations {subset}"):
        convert_annotation(xml_file, class_map)
        
    for img_file in tqdm(glob.glob(os.path.join(RAW_DIR, subset, "*.jpg")), desc=f"Đang sao chép ảnh {subset}"):
        shutil.copy(img_file, os.path.join(PROCESSED_DIR, subset))

# Tạo file dataset.yaml
dataset_config = {
    'path': PROCESSED_DIR,
    'train': os.path.join(PROCESSED_DIR, 'train'),
    'val': os.path.join(PROCESSED_DIR, 'valid'),
    'nc': len(class_map),
    'names': list(class_map.keys())
}

with open(os.path.join(PROCESSED_DIR, 'dataset.yaml'), 'w') as f:
    yaml.dump(dataset_config, f)

print("Chuẩn bị dataset hoàn tất.")

# Tải model YOLOv5 pre-trained (sử dụng phiên bản nano)
model = YOLO('yolov5n.pt')

# Thiết lập tham số training
model.train(
    data=os.path.join(PROCESSED_DIR, 'dataset.yaml'),
    epochs=50,
    imgsz=640,
    batch=16,
    workers=2,
    device='cpu',  # Sử dụng CPU thay vì GPU
    patience=10,
    save=True,
    project=MODEL_DIR,
    name='number_detection',
    optimizer='Adam',
    lr0=0.001,
    weight_decay=0.0005,
    warmup_epochs=3,
    mosaic=0.5,
    cache=False
)

# Đánh giá model
results = model.val()
print(results)

# Lưu model đã train
model.save(os.path.join(MODEL_DIR, 'number_detection_best.pt'))

print("Hoàn tất training. Model đã được lưu tại:", MODEL_DIR)

Đang chuyển đổi annotations train: 100%|██████████| 1032/1032 [00:00<00:00, 1458.92it/s]
Đang sao chép ảnh train: 100%|██████████| 1032/1032 [00:07<00:00, 146.89it/s]
Đang chuyển đổi annotations valid: 100%|██████████| 99/99 [00:00<00:00, 109.11it/s]
Đang sao chép ảnh valid: 100%|██████████| 99/99 [00:00<00:00, 145.76it/s]

Chuẩn bị dataset hoàn tất.
PRO TIP  Replace 'model=yolov5n.pt' with new 'model=yolov5nu.pt'.
YOLOv5 'u' models are trained with https://github.com/ultralytics/ultralytics and feature improved performance vs standard YOLOv5 models trained with https://github.com/ultralytics/yolov5.






New https://pypi.org/project/ultralytics/8.3.11 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.10  Python-3.12.6 torch-2.4.1+cpu CPU (11th Gen Intel Core(TM) i7-1185G7 3.00GHz)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov5n.pt, data=d:\Coding\number-detection-yolo\data\processed\dataset.yaml, epochs=50, time=None, patience=10, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=cpu, workers=2, project=d:\Coding\number-detection-yolo\models, name=number_detection, exist_ok=False, pretrained=True, optimizer=Adam, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, a

[34m[1mtrain: [0mScanning D:\Coding\number-detection-yolo\data\processed\train... 1032 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1032/1032 [00:03<00:00, 303.85it/s]


[34m[1mtrain: [0mNew cache created: D:\Coding\number-detection-yolo\data\processed\train.cache


[34m[1mval: [0mScanning D:\Coding\number-detection-yolo\data\processed\valid... 99 images, 0 backgrounds, 0 corrupt: 100%|██████████| 99/99 [00:00<00:00, 358.95it/s]


[34m[1mval: [0mNew cache created: D:\Coding\number-detection-yolo\data\processed\valid.cache
Plotting labels to d:\Coding\number-detection-yolo\models\number_detection\labels.jpg... 
[34m[1moptimizer:[0m Adam(lr=0.001, momentum=0.937) with parameter groups 75 weight(decay=0.0), 82 weight(decay=0.0005), 81 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1md:\Coding\number-detection-yolo\models\number_detection[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/50         0G      2.103      4.518      1.953        112        640:   6%|▌         | 4/65 [00:19<04:46,  4.69s/it]