In [36]:
import os
import torch
from ultralytics import YOLO
import matplotlib.pyplot as plt
import cv2
import glob
from IPython.display import display, Image
import numpy as np
import yaml
from PIL import Image as PILImage
import albumentations as A
import shutil
from pathlib import Path

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

Using device: cuda


In [37]:
main_path = 'dataset'
data_yaml_path = 'dataset/data.yaml'
class_names = ["ball", "goalkeeper", "player", "referee"]
work_dir = '.'
os.makedirs(work_dir, exist_ok=True)
with open(data_yaml_path, 'r') as file:
    data_cfg = yaml.safe_load(file)
    
print("Dataset configuration:")
print(data_cfg)

Dataset configuration:
{'train': '../train/images', 'val': '../valid/images', 'test': '../test/images', 'nc': 4, 'names': ['ball', 'goalkeeper', 'player', 'referee'], 'roboflow': {'workspace': 'roboflow-jvuqo', 'project': 'football-players-detection-3zvbc', 'version': 9, 'license': 'CC BY 4.0', 'url': 'https://universe.roboflow.com/roboflow-jvuqo/football-players-detection-3zvbc/dataset/9'}}


In [38]:
model_variant = 'yolov8m.pt'

In [39]:
IMG_SIZE = 1280 
BATCH_SIZE = 8   
EPOCHS = 50     
LEARNING_RATE = 0.001
WEIGHT_DECAY = 0.0005
CONF_THRESHOLD = 0.25  
NMS_IOU_THRESHOLD = 0.5

In [40]:
custom_data_yaml = os.path.join(work_dir, 'custom_data.yaml')

with open(data_yaml_path, 'r') as infile, open(custom_data_yaml, 'w') as outfile:
    data = yaml.safe_load(infile)
    
    # Add class weights to emphasize rare classes
    data['class_weights'] = [5.0, 3.0, 1.0, 4.0]  # Increase weights for ball (0), goalkeeper (1), and referee (3)
    
    yaml.dump(data, outfile)


In [41]:
def create_augmented_dataset():
    """Create a copy and perform augmentation for rare classes (ball, goalkeeper, referee)."""

    # Create directory for augmented dataset
    aug_dir = os.path.join(work_dir, 'augmented_dataset')

    # Create YOLOv8-compliant directory structure
    train_images_dir = os.path.join(aug_dir, 'train', 'images')
    train_labels_dir = os.path.join(aug_dir, 'train', 'labels')
    val_images_dir = os.path.join(aug_dir, 'valid', 'images')  # Folder name must be "valid", not "val"
    val_labels_dir = os.path.join(aug_dir, 'valid', 'labels')

    # Create necessary folders
    os.makedirs(train_images_dir, exist_ok=True)
    os.makedirs(train_labels_dir, exist_ok=True)
    os.makedirs(val_images_dir, exist_ok=True)
    os.makedirs(val_labels_dir, exist_ok=True)

    # Copy original training data and apply augmentation
    train_img_dir = os.path.join(main_path, 'train', 'images')
    train_label_dir = os.path.join(main_path, 'train', 'labels')

    img_files = sorted(glob.glob(os.path.join(train_img_dir, '*.*')))
    print(f"Found {len(img_files)} original training images.")

    # Define augmentations
    transform = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.3),
        A.RandomScale(scale_limit=0.2, p=0.4),
        A.Blur(blur_limit=3, p=0.2),
        A.MotionBlur(blur_limit=3, p=0.2),  # Useful for fast-moving objects like balls
    ])

    # Copy and augment training files
    for img_path in img_files:
        img_name = os.path.basename(img_path)
        label_name = os.path.splitext(img_name)[0] + '.txt'
        label_path = os.path.join(train_label_dir, label_name)

        # Copy original image and label
        shutil.copy(img_path, os.path.join(train_images_dir, img_name))
        shutil.copy(label_path, os.path.join(train_labels_dir, label_name))

        # Check if the label contains rare classes
        with open(label_path, 'r') as f:
            labels = f.readlines()

        has_rare_class = False
        for label in labels:
            class_id = int(label.split()[0])
            if class_id in [0, 1, 3]:  # ball, goalkeeper, referee
                has_rare_class = True
                break

        # If rare class is present, perform augmentation
        if has_rare_class:
            img = np.array(PILImage.open(img_path))

            # Create 3 augmented versions for each image with rare classes
            for i in range(3):
                augmented = transform(image=img)
                aug_img = augmented['image']

                # Save augmented image
                aug_img_name = f"aug_{i}_{img_name}"
                PILImage.fromarray(aug_img).save(os.path.join(train_images_dir, aug_img_name))

                # Copy label (assumes augmentations don't affect bounding boxes)
                aug_label_name = f"aug_{i}_{label_name}"
                shutil.copy(label_path, os.path.join(train_labels_dir, aug_label_name))

    # Copy original validation data
    val_img_dir = os.path.join(main_path, 'valid', 'images')
    val_label_dir = os.path.join(main_path, 'valid', 'labels')

    val_files = sorted(glob.glob(os.path.join(val_img_dir, '*.*')))
    print(f"Found {len(val_files)} original validation images.")

    # Copy all validation files
    for img_path in val_files:
        img_name = os.path.basename(img_path)
        label_name = os.path.splitext(img_name)[0] + '.txt'
        label_path = os.path.join(val_label_dir, label_name)

        # Copy files
        shutil.copy(img_path, os.path.join(val_images_dir, img_name))
        if os.path.exists(label_path):  # Check if label exists
            shutil.copy(label_path, os.path.join(val_labels_dir, label_name))

    # Update data.yaml with new paths
    aug_data_yaml = os.path.join(aug_dir, 'data.yaml')
    with open(custom_data_yaml, 'r') as infile, open(aug_data_yaml, 'w') as outfile:
        data = yaml.safe_load(infile)
        data['train'] = train_images_dir       # Update training path
        data['val'] = val_images_dir           # Update validation path
        data['path'] = aug_dir                 # Update base path
        data['names'] = class_names            # Ensure correct class names
        data['nc'] = len(class_names)          # Number of classes
        yaml.dump(data, outfile)

    return aug_data_yaml


In [42]:
augmented_data_yaml = create_augmented_dataset()

Found 250 original training images.
Found 43 original validation images.


In [43]:
print("\nKiểm tra nội dung YAML:")
with open(augmented_data_yaml, 'r') as f:
    print(f.read())


Kiểm tra nội dung YAML:
class_weights:
- 5.0
- 3.0
- 1.0
- 4.0
names:
- ball
- goalkeeper
- player
- referee
nc: 4
path: ./augmented_dataset
roboflow:
  license: CC BY 4.0
  project: football-players-detection-3zvbc
  url: https://universe.roboflow.com/roboflow-jvuqo/football-players-detection-3zvbc/dataset/9
  version: 9
  workspace: roboflow-jvuqo
test: ../test/images
train: ./augmented_dataset/train/images
val: ./augmented_dataset/valid/images



In [44]:
model = YOLO(model_variant)
model.to(DEVICE)

YOLO(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(48, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(48, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(96, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C2f(
        (cv1): Conv(
          (conv): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(96, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(96, eps=0.001, momentum=0.03, affine=True, track_running_

In [47]:
results_train = model.train(
    data='custom_data.yaml',         # 数据集配置文件路径（含训练/验证图像和标签）
    epochs=EPOCHS,                    # 训练轮数
    imgsz=IMG_SIZE,                   # 输入图像尺寸
    batch=BATCH_SIZE,                 # 批次大小
    device=DEVICE,                    # 使用的设备（如 '0' 表示第一个GPU）
    
    weight_decay=WEIGHT_DECAY,        # 权重衰减（防止过拟合）
    lr0=LEARNING_RATE,                # 初始学习率
    lrf=0.01,                         # 最终学习率比例（learning rate factor）

    mosaic=1.0,                       # 启用 Mosaic 增强（默认已启用）
    mixup=0.2,                        # 启用 MixUp 增强（图像融合）
    copy_paste=0.3,                   # 启用 Copy-Paste 增强
    fliplr=0.5,                       # 随机水平翻转图像的概率
    scale=0.25,                       # 随机缩放图像比例
    degrees=5.0,                      # 随机旋转图像角度（适合体育类数据）

    exist_ok=True,                    # 如果保存目录存在则覆盖
    patience=15,                      # Early stopping 的容忍轮数（防止过拟合）
    save_period=5,                    # 每 5 个 epoch 保存一次权重文件
)

print("\n✅ 模型训练已完成！")

Ultralytics 8.3.166 🚀 Python-3.11.13 torch-2.7.1+cu126 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 7933MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.3, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=custom_data.yaml, degrees=5.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=1280, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.001, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.2, mode=train, model=yolov8m.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=15, perspective=0.0, plots=True,

[34m[1mtrain: [0mScanning /home/cacc/Repositories/ComputerVisionExamples/FootballPlayerDetection/dataset/train/labels.cache... 250 images, 0 backgrounds, 0 corrupt: 100%|██████████| 250/250 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))





[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 3589.7±2746.3 MB/s, size: 218.3 KB)


[34m[1mval: [0mScanning /home/cacc/Repositories/ComputerVisionExamples/FootballPlayerDetection/dataset/valid/labels.cache... 43 images, 0 backgrounds, 0 corrupt: 100%|██████████| 43/43 [00:00<?, ?it/s]


Plotting labels to runs/detect/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.001' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.00125, momentum=0.9) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.0005), 83 bias(decay=0.0)
Image sizes 1280 train, 1280 val
Using 8 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


  0%|          | 0/32 [00:00<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 114.00 MiB. GPU 0 has a total capacity of 7.75 GiB of which 136.31 MiB is free. Including non-PyTorch memory, this process has 6.67 GiB memory in use. Of the allocated memory 6.46 GiB is allocated by PyTorch, and 68.16 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
results_val = model.val(
    data=augmented_data_yaml,
    split='val',
    imgsz=IMG_SIZE,
    batch=BATCH_SIZE,
    conf=CONF_THRESHOLD,
    iou=NMS_IOU_THRESHOLD,
    device=DEVICE
)

print("\n--- Kết quả Đánh Giá Validation ---")
print(f"mAP50-95: {results_val.box.map:.4f}")
print(f"mAP50:    {results_val.box.map50:.4f}")
print("Chi tiết từng lớp:")
for i, class_name in enumerate(class_names):
    try:
        class_precision = results_val.box.p[i]
        class_recall = results_val.box.r[i]
        class_map50 = results_val.box.maps[i]
        print(f"{class_name}: Precision={class_precision:.4f}, Recall={class_recall:.4f}, mAP50={class_map50:.4f}")
    except:
        print(f"{class_name}: Không có dữ liệu")

In [None]:
try:
    results_test = model.val(
        data=augmented_data_yaml,
        split='test',
        imgsz=IMG_SIZE,
        batch=BATCH_SIZE,
        conf=CONF_THRESHOLD,
        iou=NMS_IOU_THRESHOLD,
        device=DEVICE
    )
    print("\n--- Kết quả Đánh Giá Test ---")
    print(f"mAP50-95 (Test): {results_test.box.map:.4f}")
    print(f"mAP50 (Test):    {results_test.box.map50:.4f}")
except Exception as e:
    print(f"Không thể đánh giá trên tập test. Lỗi: {e}")

In [None]:
def ensemble_predictions(image_path, models, conf_threshold=0.25, iou_threshold=0.5):
    """Kết hợp dự đoán từ nhiều mô hình"""
    all_boxes = []
    all_scores = []
    all_classes = []
    
    # Dự đoán với từng mô hình
    for model in models:
        results = model.predict(
            source=image_path,
            imgsz=IMG_SIZE,
            conf=conf_threshold,
            iou=iou_threshold,
            device=DEVICE,
            save=False
        )[0]
        
        # Lấy kết quả
        if results.boxes.xyxy.shape[0] > 0:
            all_boxes.extend(results.boxes.xyxy.cpu().numpy())
            all_scores.extend(results.boxes.conf.cpu().numpy())
            all_classes.extend(results.boxes.cls.cpu().numpy())
    
    return np.array(all_boxes), np.array(all_scores), np.array(all_classes)

In [None]:
best_model_path = os.path.join(results_train.save_dir, 'weights/best.pt')
last_model_path = os.path.join(results_train.save_dir, 'weights/last.pt')

models = []
if os.path.exists(best_model_path):
    print(f"Loading best model from: {best_model_path}")
    best_model = YOLO(best_model_path)
    best_model.to(DEVICE)
    models.append(best_model)

if os.path.exists(last_model_path):
    print(f"Loading last model from: {last_model_path}")
    last_model = YOLO(last_model_path)
    last_model.to(DEVICE)
    models.append(last_model)

# Nếu không tìm thấy model, sử dụng model hiện tại
if len(models) == 0:
    print("Không tìm thấy models đã lưu, sử dụng model hiện tại.")
    models = [model]

In [None]:
test_img_dir = os.path.join(main_path, 'test', 'images')
test_image_paths = sorted(glob.glob(os.path.join(test_img_dir, '*.*')))[:5]

if not test_image_paths:
    print(f"Không tìm thấy ảnh nào trong thư mục test: {test_img_dir}")
else:
    for img_path in test_image_paths:
        print(f"\nPredicting on: {img_path}")
        
        # Sử dụng kỹ thuật Test Time Augmentation (TTA) và Ensemble
        # TTA thực hiện dự đoán trên nhiều biến thể của cùng một ảnh và kết hợp kết quả
        results_pred = models[0].predict(
            source=img_path,
            imgsz=IMG_SIZE,
            conf=CONF_THRESHOLD,
            iou=NMS_IOU_THRESHOLD,
            device=DEVICE,
            augment=True,  # Kích hoạt TTA
            save=False
        )
        
        result = results_pred[0]
        
        img_orig = result.orig_img
        img_with_boxes = img_orig.copy()
        boxes = result.boxes
        
        class_counts = {name: 0 for name in class_names}
        
        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            conf = box.conf[0].item()
            cls_id = int(box.cls[0].item())
            class_name = result.names[cls_id]
            class_counts[class_name] += 1
            
            # Màu khác nhau cho mỗi class
            if class_name == "ball":
                color = (255, 0, 0)  # Đỏ cho bóng
            elif class_name == "goalkeeper":
                color = (0, 255, 0)  # Xanh lá cho thủ môn
            elif class_name == "player":
                color = (0, 0, 255)  # Xanh dương cho cầu thủ
            else:  # referee
                color = (255, 255, 0)  # Vàng cho trọng tài
            
            cv2.rectangle(img_with_boxes, (x1, y1), (x2, y2), color, 2)
            
            label = f"{class_name}: {conf:.2f}"
            (text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
            cv2.rectangle(img_with_boxes, (x1, y1 - text_height - baseline), (x1 + text_width, y1), color, -1)
            cv2.putText(img_with_boxes, label, (x1, y1 - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
        
        plt.figure(figsize=(15, 15))
        plt.imshow(cv2.cvtColor(img_with_boxes, cv2.COLOR_BGR2RGB))
        plt.title(f"Prediction: {os.path.basename(img_path)}\n{class_counts}")
        plt.axis('off')
        plt.show()