In [1]:
import os
import glob
import shutil
import random
import xml.etree.ElementTree as ET
from PIL import Image
from ultralytics import YOLO
import torch

In [2]:
print("INFO:\n")
print(f"Logical CPU threads available: {os.cpu_count()}")
print("PyTorch version:", torch.__version__)

cuda_available = torch.cuda.is_available()
print("CUDA available:", cuda_available)
if cuda_available:
    print("CUDA version:", torch.version.cuda)
    print("Device:", torch.cuda.get_device_name(0))


INFO:

Logical CPU threads available: 16
PyTorch version: 2.7.0+cu118
CUDA available: True
CUDA version: 11.8
Device: NVIDIA GeForce RTX 4070 SUPER


In [3]:
IMAGE_DIR = "images"                 # Directory containing PNG images
XML_DIR = "annotations"              # Directory containing XML annotations
OUTPUT_DIR = "yolo_dataset"          # Directory to save the prepared dataset

# Create output directories
os.makedirs(os.path.join(OUTPUT_DIR, "images", "train"), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, "images", "val"), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, "labels", "train"), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, "labels", "val"), exist_ok=True)

In [4]:
class_names = set()
for xml_file in glob.glob(os.path.join(XML_DIR, "*.xml")):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    
    for obj in root.findall('object'):
        class_name = obj.find('name').text
        class_names.add(class_name)

class_names = sorted(list(class_names))

print(f"Found {len(class_names)} unique brick classes: {class_names}")

Found 200 unique brick classes: ['10247', '11090', '11211', '11212', '11214', '11458', '11476', '11477', '14704', '14719', '14769', '15068', '15070', '15100', '15379', '15392', '15535', '15573', '15712', '18651', '18654', '18674', '18677', '20482', '22388', '22885', '2357', '2412b', '2420', '24201', '24246', '2429', '2430', '2431', '2432', '2436', '2445', '2450', '2454', '2456', '24866', '25269', '2540', '26047', '2654', '26601', '26603', '26604', '2780', '27925', '28192', '2877', '3001', '3002', '3003', '3004', '3005', '3008', '3009', '3010', '30136', '3020', '3021', '3022', '3023', '3024', '3031', '3032', '3034', '3035', '3037', '30374', '3039', '3040', '30413', '30414', '3062b', '3065', '3068b', '3069b', '3070b', '32000', '32013', '32028', '32054', '32062', '32064', '32073', '32123', '32140', '32184', '32278', '32316', '3245c', '32523', '32524', '32525', '32526', '32607', '32952', '33291', '33909', '34103', '3460', '35480', '3622', '3623', '3660', '3665', '3666', '3673', '3700', '37

In [None]:
def xml_to_yolo_label(xml_file, image_width, image_height):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    
    yolo_annotations = []
    
    for obj in root.findall('object'):
        # Get class name
        class_name = obj.find('name').text
        
        # Get bounding box coordinates
        bndbox = obj.find('bndbox')
        xmin = float(bndbox.find('xmin').text)
        ymin = float(bndbox.find('ymin').text)
        xmax = float(bndbox.find('xmax').text)
        ymax = float(bndbox.find('ymax').text)
        
        # Convert to YOLO format (normalized center_x, center_y, width, height)
        x_center = ((xmin + xmax) / 2) / image_width
        y_center = ((ymin + ymax) / 2) / image_height
        width = (xmax - xmin) / image_width
        height = (ymax - ymin) / image_height
        
        # Format: class_id_index center_x center_y width height
        yolo_annotations.append(f"{class_names.index(class_name)} {x_center} {y_center} {width} {height}")
    
    return yolo_annotations

In [None]:
# Get all image files
image_files = sorted(glob.glob(os.path.join(IMAGE_DIR, "*.png")))
print(f"Found {len(image_files)} image files")

# Randomly split into train and validation sets (80:20)
random.seed(42)  # for reproducibility
random.shuffle(image_files)
split_idx = int(0.8 * len(image_files))
train_images = image_files[:split_idx]
test_images = image_files[split_idx:]

print(f"Training images: {len(train_images)}")
print(f"Validation images: {len(test_images)}")

# Process the images and annotations
def process_dataset(image_files, dataset_type):
    for img_path in image_files:

        # Get image filename without extension
        img_filename = os.path.basename(img_path)
        img_name = os.path.splitext(img_filename)[0]
        
        # Load the image to get dimensions
        img = Image.open(img_path)
        img_width, img_height = img.size
        
        # Find the corresponding XML file
        xml_path = os.path.join(XML_DIR, f"{img_name}.xml")
        
        if os.path.exists(xml_path):
            
            # Copy image to dataset directory
            output_img_path = os.path.join(OUTPUT_DIR, "images", dataset_type, img_filename)
            shutil.copy(img_path, output_img_path)

            # Convert annotations to YOLO labels
            yolo_annotations = xml_to_yolo_label(xml_path, img_width, img_height)
            
            # Save YOLO annotations
            output_label_path = os.path.join(OUTPUT_DIR, "labels", dataset_type, f"{img_name}.txt")
            with open(output_label_path, "w") as f:
                f.write("\n".join(yolo_annotations))
        else:
            print(f"Warning: XML file not found for {img_path}")

# Process training and validation sets
process_dataset(train_images, "train")
process_dataset(test_images, "val")

In [8]:
# Create dataset.yaml file
yaml_content = f"""
# YOLO dataset configuration
path: {os.path.abspath(OUTPUT_DIR)}
train: images/train
val: images/val

# Classes
nc: {len(class_names)}
names: {class_names}
"""

with open(os.path.join(OUTPUT_DIR, "dataset.yaml"), "w") as f:
    f.write(yaml_content)

print("Dataset preparation complete!")

Dataset preparation complete!


In [None]:
# Select base model
model = YOLO('yolov8m.pt')

# Clear cache
if cuda_available:
    torch.cuda.empty_cache()

# Start training
results = model.train(
    data=os.path.join(OUTPUT_DIR, "dataset.yaml"),
    epochs=500,
    imgsz=1024,
    batch=-1,
    patience=20,
    device='cuda',
    project=OUTPUT_DIR,
    name='lego_yolo8m'
)

New https://pypi.org/project/ultralytics/8.3.142 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.137  Python-3.12.4 torch-2.7.0+cu118 CUDA:0 (NVIDIA GeForce RTX 4070 SUPER, 12282MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=2, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=yolo_dataset\dataset.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=500, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=1024, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8m.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=lego_yolo8m5, nbs=64, nms

[34m[1mtrain: [0mScanning D:\GitHub\MainProject\yolo_dataset\labels\train... 1600 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1600/1600 [00:29<00:00, 53.53it/s]


[34m[1mtrain: [0mNew cache created: D:\GitHub\MainProject\yolo_dataset\labels\train.cache
[34m[1mval: [0mFast image access  (ping: 0.60.2 ms, read: 236.245.2 MB/s, size: 7508.7 KB)


[34m[1mval: [0mScanning D:\GitHub\MainProject\yolo_dataset\labels\val... 400 images, 0 backgrounds, 0 corrupt: 100%|██████████| 400/400 [00:06<00:00, 60.19it/s]

[34m[1mval: [0mNew cache created: D:\GitHub\MainProject\yolo_dataset\labels\val.cache





Plotting labels to yolo_dataset\lego_yolo8m5\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.9) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.0005), 83 bias(decay=0.0)
Image sizes 1024 train, 1024 val
Using 8 dataloader workers
Logging results to [1myolo_dataset\lego_yolo8m5[0m
Starting training for 500 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      1/500      8.14G     0.8444      3.738     0.9136       1292       1024: 100%|██████████| 800/800 [01:40<00:00,  7.98it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:18<00:00,  5.48it/s]


                   all        400     160000       0.26      0.265      0.246      0.222

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      2/500      12.6G     0.7919      2.058     0.8663        859       1024: 100%|██████████| 800/800 [05:02<00:00,  2.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [01:01<00:00,  1.62it/s]


                   all        400     160000      0.372      0.335      0.344      0.301

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      3/500      10.4G      0.828       1.74     0.8707        371       1024: 100%|██████████| 800/800 [01:49<00:00,  7.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:16<00:00,  5.95it/s]


                   all        400     160000      0.528      0.416      0.476      0.405

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      4/500      8.32G     0.8362      1.529     0.8678        824       1024: 100%|██████████| 800/800 [01:36<00:00,  8.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:18<00:00,  5.41it/s]


                   all        400     160000      0.648      0.503      0.584      0.483

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      5/500      5.52G     0.8115      1.378      0.855        530       1024:   5%|▌         | 42/800 [00:05<01:32,  8.23it/s]

In [5]:
metrics = model.val(data=os.path.join(OUTPUT_DIR, "dataset.yaml"), split='val') 

# Show key metrics
print(f"mAP@50-95: {metrics.box.map:.3f}")
print(f"mAP@50: {metrics.box.map50:.3f}")
print(f"mAP@75: {metrics.box.map75:.3f}")

Ultralytics 8.3.137  Python-3.12.4 torch-2.7.0+cu118 CUDA:0 (NVIDIA GeForce RTX 4070 SUPER, 12282MiB)
Model summary (fused): 92 layers, 25,955,560 parameters, 0 gradients, 79.3 GFLOPs
[34m[1mval: [0mFast image access  (ping: 0.10.1 ms, read: 2802.6329.8 MB/s, size: 7577.3 KB)


[34m[1mval: [0mScanning D:\GitHub\MainProject\dataset\labels\val.cache... 400 images, 0 backgrounds, 0 corrupt: 100%|██████████| 400/400 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 200/200 [01:00<00:00,  3.30it/s]


                   all        400     160000      0.888      0.681      0.795      0.725
                 10247        400        800      0.891      0.694      0.821       0.77
                 11090        400        800      0.941       0.58      0.763       0.63
                 11211        400        800      0.743      0.379      0.575      0.527
                 11212        400        800      0.996       0.97      0.985      0.962
                 11214        400        800      0.749      0.335      0.542      0.492
                 11458        400        800       0.91      0.542      0.741      0.674
                 11476        400        800       0.79      0.695      0.771      0.698
                 11477        400        800      0.749      0.321      0.542      0.483
                 14704        400        800      0.966       0.77      0.879      0.792
                 14719        400        800      0.943      0.889      0.931      0.866
                 1476