# Library

In [1]:
import pandas as pd

# Data Understanding

In [3]:
# ================================
# DATASET PATHS
# ================================
import os

DATASET_PATH = r"C:\Users\fccat\Documents\Universita\DATA SCIENCE\Digital Signal and Image Management\Project\DETECTION\Second Try\Dataset"

IMAGES_TRAIN = os.path.join(DATASET_PATH, "images", "train")
IMAGES_VAL   = os.path.join(DATASET_PATH, "images", "val")
LABELS_TRAIN = os.path.join(DATASET_PATH, "labels", "train")
LABELS_VAL   = os.path.join(DATASET_PATH, "labels", "val")

print("Dataset path loaded correctly")

Dataset path loaded correctly


## FOLDER STRUCTURE

In [4]:
# ================================
# ANALYZE FOLDER STRUCTURE
# ================================
def analyze_folder(path):
    files = os.listdir(path)
    return len(files), files[:5]

print("Images train:", analyze_folder(IMAGES_TRAIN))
print("Images val:", analyze_folder(IMAGES_VAL))
print("Labels train:", analyze_folder(LABELS_TRAIN))
print("Labels val:", analyze_folder(LABELS_VAL))


Images train: (520, ['frame_0_1.jpg', 'frame_0_10.jpg', 'frame_0_100.jpg', 'frame_0_101.jpg', 'frame_0_102.jpg'])
Images val: (80, ['frame_0_133.jpg', 'frame_0_135.jpg', 'frame_0_141.jpg', 'frame_0_143.jpg', 'frame_0_144.jpg'])
Labels train: (520, ['frame_0_1.txt', 'frame_0_10.txt', 'frame_0_100.txt', 'frame_0_101.txt', 'frame_0_102.txt'])
Labels val: (80, ['frame_0_133.txt', 'frame_0_135.txt', 'frame_0_141.txt', 'frame_0_143.txt', 'frame_0_144.txt'])


## NUMBER OF IMAGES

In [5]:
# ================================
# COUNT NUMBER OF IMAGES
# ================================
def count_images(path):
    return len([f for f in os.listdir(path) if f.endswith((".jpg", ".png", ".jpeg"))])

n_train_images = count_images(IMAGES_TRAIN)
n_val_images   = count_images(IMAGES_VAL)

print(f"Train images: {n_train_images}")
print(f"Validation images: {n_val_images}")
print(f"Total images: {n_train_images + n_val_images}")


Train images: 520
Validation images: 80
Total images: 600


## Class Distribution

In [6]:
# ================================
# CLASS DISTRIBUTION (YOLO LABELS)
# ================================
from collections import Counter

CLASS_NAMES = {
    0: "player",
    1: "referee",
    2: "ball"
}

def count_classes(label_path):
    counter = Counter()
    for file in os.listdir(label_path):
        if file.endswith(".txt"):
            with open(os.path.join(label_path, file), "r") as f:
                for line in f:
                    class_id = int(line.split()[0])
                    counter[class_id] += 1
    return counter

train_class_dist = count_classes(LABELS_TRAIN)
val_class_dist   = count_classes(LABELS_VAL)

print("Train class distribution:")
for k, v in train_class_dist.items():
    print(CLASS_NAMES[k], ":", v)

print("\nValidation class distribution:")
for k, v in val_class_dist.items():
    print(CLASS_NAMES[k], ":", v)


Train class distribution:
player : 9916
referee : 844
ball : 384

Validation class distribution:
player : 1467
referee : 129
ball : 61


## BOUNDING BOX QUALITY CHECK

In [7]:
# ================================
# BOUNDING BOX QUALITY CHECK
# (normalized values must be in [0,1])
# ================================
def check_bboxes(label_path):
    invalid_boxes = 0
    total_boxes = 0

    for file in os.listdir(label_path):
        if file.endswith(".txt"):
            with open(os.path.join(label_path, file), "r") as f:
                for line in f:
                    _, x, y, w, h = map(float, line.split())
                    total_boxes += 1
                    if not (0 <= x <= 1 and 0 <= y <= 1 and 0 <= w <= 1 and 0 <= h <= 1):
                        invalid_boxes += 1
    return total_boxes, invalid_boxes

train_boxes, train_invalid = check_bboxes(LABELS_TRAIN)
val_boxes, val_invalid     = check_bboxes(LABELS_VAL)

print(f"Train boxes: {train_boxes}, Invalid: {train_invalid}")
print(f"Val boxes: {val_boxes}, Invalid: {val_invalid}")


Train boxes: 11144, Invalid: 0
Val boxes: 1657, Invalid: 0


## IMAGE DIMENSIONS ANALYSIS

In [8]:
# ================================
# IMAGE DIMENSIONS ANALYSIS
# ================================
import cv2
import numpy as np

def analyze_image_sizes(image_path, max_images=100):
    sizes = []
    for i, file in enumerate(os.listdir(image_path)):
        if file.endswith((".jpg", ".png", ".jpeg")):
            img = cv2.imread(os.path.join(image_path, file))
            if img is not None:
                h, w, _ = img.shape
                sizes.append((w, h))
        if i >= max_images:
            break
    return sizes

train_sizes = analyze_image_sizes(IMAGES_TRAIN)
val_sizes   = analyze_image_sizes(IMAGES_VAL)

print("Train image size example:", train_sizes[:5])
print("Validation image size example:", val_sizes[:5])


Train image size example: [(1280, 720), (1280, 720), (1280, 720), (1280, 720), (1280, 720)]
Validation image size example: [(1280, 720), (1280, 720), (1280, 720), (1280, 720), (1280, 720)]


## Summary stats

In [9]:
# ================================
# SUMMARY STATISTICS
# ================================
def summarize_sizes(sizes):
    widths  = [s[0] for s in sizes]
    heights = [s[1] for s in sizes]
    return {
        "min_width": min(widths),
        "max_width": max(widths),
        "min_height": min(heights),
        "max_height": max(heights)
    }

print("Train image size summary:", summarize_sizes(train_sizes))
print("Val image size summary:", summarize_sizes(val_sizes))


Train image size summary: {'min_width': 1280, 'max_width': 1280, 'min_height': 720, 'max_height': 720}
Val image size summary: {'min_width': 1280, 'max_width': 1280, 'min_height': 720, 'max_height': 720}


## IMAGE

In [47]:
import os
import cv2
from ultralytics import YOLO
import numpy as np

# ================================
# CONFIG PATHS
# ================================
DATASET_PATH = r"C:/Users/fccat/Documents/Universita/DATA SCIENCE/Digital Signal and Image Management/Project/DETECTION/Second Try/Dataset"
IMAGES_VAL   = os.path.join(DATASET_PATH, "images", "val")
LABELS_VAL   = os.path.join(DATASET_PATH, "labels", "val")

MODEL_PATH = r"C:/Users/fccat/Documents/Universita/DATA SCIENCE/Digital Signal and Image Management/Project/DETECTION/Second Try/football_detection/yolo_augmented/weights/best.pt"
OUTPUT_DIR = r"C:/Users/fccat/Documents/Universita/DATA SCIENCE/Digital Signal and Image Management/Project/DETECTION/Second Try/figures"

os.makedirs(OUTPUT_DIR, exist_ok=True)

CLASS_NAMES = {0: "player", 1: "referee", 2: "ball"}
COLORS = {0: (0,255,0), 1: (255,0,0), 2: (0,255,255)}

model = YOLO(MODEL_PATH)

# ================================
# UTILITY: draw label
# ================================
def draw_label(img, text, x, y, color):
    (w, h), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1)
    cv2.rectangle(img, (x, y - h - 6), (x + w + 4, y), color, -1)
    cv2.putText(img, text, (x + 2, y - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0,0,0), 1)

# ================================
# IMAGE 1: EXAMPLE FROM DATASET WITH ANNOTATIONS
# ================================
# Pick a frame with at least 1 ball and some players
for img_file in os.listdir(IMAGES_VAL):
    if img_file.endswith(".jpg"):
        label_file = os.path.join(LABELS_VAL, img_file.replace(".jpg", ".txt"))
        with open(label_file, "r") as f:
            lines = f.readlines()
            classes_in_frame = [int(line.split()[0]) for line in lines]
            if 2 in classes_in_frame:  # contains a ball
                img_path = os.path.join(IMAGES_VAL, img_file)
                img = cv2.imread(img_path)
                
                # draw YOLO annotations
                for line in lines:
                    cls, x, y, w, h = map(float, line.split())
                    x1 = int((x - w/2) * img.shape[1])
                    y1 = int((y - h/2) * img.shape[0])
                    x2 = int((x + w/2) * img.shape[1])
                    y2 = int((y + h/2) * img.shape[0])
                    
                    if int(cls) == 2:  # ball
                        cx = (x1+x2)//2
                        cy = (y1+y2)//2
                        radius = max(5, (x2-x1)//2)
                        cv2.circle(img, (cx, cy), radius, COLORS[int(cls)], 2)
                        draw_label(img, f"ball", x1, y1, COLORS[int(cls)])
                    else:
                        cv2.rectangle(img, (x1, y1), (x2, y2), COLORS[int(cls)], 2)
                        draw_label(img, CLASS_NAMES[int(cls)], x1, y1, COLORS[int(cls)])
                
                # save figure 1
                img1_path = os.path.join(OUTPUT_DIR, "figure1_dataset_example.png")
                cv2.imwrite(img1_path, img)
                print("Figure 1 saved:", img1_path)
                break

Figure 1 saved: C:/Users/fccat/Documents/Universita/DATA SCIENCE/Digital Signal and Image Management/Project/DETECTION/Second Try/figures\figure1_dataset_example.png


# Data Cleaning & Pre-processing

## CHECK IMAGE–LABEL CONSISTENCY

In [10]:
# ================================
# CHECK IMAGE–LABEL CONSISTENCY
# ================================
def check_image_label_pairs(images_path, labels_path):
    images = {os.path.splitext(f)[0] for f in os.listdir(images_path)
              if f.endswith((".jpg", ".png", ".jpeg"))}
    labels = {os.path.splitext(f)[0] for f in os.listdir(labels_path)
              if f.endswith(".txt")}

    missing_labels = images - labels
    missing_images = labels - images

    return missing_labels, missing_images


train_missing_labels, train_missing_images = check_image_label_pairs(
    IMAGES_TRAIN, LABELS_TRAIN
)
val_missing_labels, val_missing_images = check_image_label_pairs(
    IMAGES_VAL, LABELS_VAL
)

print("TRAIN missing labels:", len(train_missing_labels))
print("TRAIN missing images:", len(train_missing_images))
print("VAL missing labels:", len(val_missing_labels))
print("VAL missing images:", len(val_missing_images))


TRAIN missing labels: 0
TRAIN missing images: 0
VAL missing labels: 0
VAL missing images: 0


## CHECK EMPTY / MISSING ANNOTATIONS

In [11]:
# ================================
# CHECK EMPTY / MISSING ANNOTATIONS
# ================================
def check_empty_labels(label_path):
    empty_files = []
    for file in os.listdir(label_path):
        if file.endswith(".txt"):
            full_path = os.path.join(label_path, file)
            if os.path.getsize(full_path) == 0:
                empty_files.append(file)
    return empty_files


empty_train_labels = check_empty_labels(LABELS_TRAIN)
empty_val_labels   = check_empty_labels(LABELS_VAL)

print("Empty train label files:", len(empty_train_labels))
print("Empty val label files:", len(empty_val_labels))


Empty train label files: 3
Empty val label files: 0


## CHECK NORMALIZATION ERRORS

In [12]:
# ================================
# CHECK NORMALIZATION ERRORS
# (values must be in [0,1])
# ================================
def check_normalization_errors(label_path):
    errors = []

    for file in os.listdir(label_path):
        if file.endswith(".txt"):
            with open(os.path.join(label_path, file), "r") as f:
                for line_num, line in enumerate(f):
                    parts = line.strip().split()
                    if len(parts) != 5:
                        errors.append((file, line_num, "Wrong format"))
                        continue

                    class_id, x, y, w, h = map(float, parts)
                    if not (0 <= x <= 1 and 0 <= y <= 1 and 0 <= w <= 1 and 0 <= h <= 1):
                        errors.append((file, line_num, "Out of range"))

    return errors


train_norm_errors = check_normalization_errors(LABELS_TRAIN)
val_norm_errors   = check_normalization_errors(LABELS_VAL)

print("Train normalization errors:", len(train_norm_errors))
print("Val normalization errors:", len(val_norm_errors))


Train normalization errors: 0
Val normalization errors: 0


## CHECK BOUNDING BOX OUTSIDE IMAGE

In [13]:
# ================================
# CHECK BOUNDING BOX OUTSIDE IMAGE
# (YOLO logic: x±w/2, y±h/2 must be in [0,1])
# ================================
def check_bbox_outside_image(images_path, labels_path):
    invalid_boxes = []

    for label_file in os.listdir(labels_path):
        if not label_file.endswith(".txt"):
            continue

        image_file = label_file.replace(".txt", ".jpg")
        image_path = os.path.join(images_path, image_file)
        label_path = os.path.join(labels_path, label_file)

        if not os.path.exists(image_path):
            continue

        with open(label_path, "r") as f:
            for i, line in enumerate(f):
                _, x, y, w, h = map(float, line.split())
                if (x - w/2 < 0 or x + w/2 > 1 or
                    y - h/2 < 0 or y + h/2 > 1):
                    invalid_boxes.append((label_file, i))

    return invalid_boxes


train_outside_boxes = check_bbox_outside_image(IMAGES_TRAIN, LABELS_TRAIN)
val_outside_boxes   = check_bbox_outside_image(IMAGES_VAL, LABELS_VAL)

print("Train boxes outside image:", len(train_outside_boxes))
print("Val boxes outside image:", len(val_outside_boxes))


Train boxes outside image: 26
Val boxes outside image: 3


## FINAL DATA CLEANING REPORT

In [14]:
# ================================
# FINAL DATA CLEANING REPORT
# ================================
print("========== DATA CLEANING SUMMARY ==========")
print(f"Train images without labels: {len(train_missing_labels)}")
print(f"Train labels without images: {len(train_missing_images)}")
print(f"Empty train labels: {len(empty_train_labels)}")
print(f"Train normalization errors: {len(train_norm_errors)}")
print(f"Train boxes outside image: {len(train_outside_boxes)}")

print("-------------------------------------------")

print(f"Val images without labels: {len(val_missing_labels)}")
print(f"Val labels without images: {len(val_missing_images)}")
print(f"Empty val labels: {len(empty_val_labels)}")
print(f"Val normalization errors: {len(val_norm_errors)}")
print(f"Val boxes outside image: {len(val_outside_boxes)}")


Train images without labels: 0
Train labels without images: 0
Empty train labels: 3
Train normalization errors: 0
Train boxes outside image: 26
-------------------------------------------
Val images without labels: 0
Val labels without images: 0
Empty val labels: 0
Val normalization errors: 0
Val boxes outside image: 3


## REMOVE EMPTY LABEL FILES

In [15]:
# ================================
# REMOVE EMPTY LABEL FILES
# ================================
def remove_empty_labels(label_path):
    removed = []
    for file in os.listdir(label_path):
        if file.endswith(".txt"):
            full_path = os.path.join(label_path, file)
            if os.path.getsize(full_path) == 0:
                os.remove(full_path)
                removed.append(file)
    return removed


removed_train = remove_empty_labels(LABELS_TRAIN)
removed_val   = remove_empty_labels(LABELS_VAL)

print(f"Removed empty train labels: {len(removed_train)}")
print(f"Removed empty val labels: {len(removed_val)}")


Removed empty train labels: 3
Removed empty val labels: 0


## CLIP BOUNDING BOXES INSIDE IMAGE

In [16]:
# ================================
# CLIP BOUNDING BOXES INSIDE IMAGE
# (fix boxes slightly outside [0,1])
# ================================
def clip_bboxes(label_path):
    fixed_files = 0

    for file in os.listdir(label_path):
        if not file.endswith(".txt"):
            continue

        full_path = os.path.join(label_path, file)
        new_lines = []
        modified = False

        with open(full_path, "r") as f:
            for line in f:
                class_id, x, y, w, h = map(float, line.split())

                x1 = max(0.0, x - w / 2)
                y1 = max(0.0, y - h / 2)
                x2 = min(1.0, x + w / 2)
                y2 = min(1.0, y + h / 2)

                new_w = x2 - x1
                new_h = y2 - y1
                new_x = x1 + new_w / 2
                new_y = y1 + new_h / 2

                if (new_x, new_y, new_w, new_h) != (x, y, w, h):
                    modified = True

                new_lines.append(
                    f"{int(class_id)} {new_x:.6f} {new_y:.6f} {new_w:.6f} {new_h:.6f}\n"
                )

        if modified:
            with open(full_path, "w") as f:
                f.writelines(new_lines)
            fixed_files += 1

    return fixed_files


fixed_train = clip_bboxes(LABELS_TRAIN)
fixed_val   = clip_bboxes(LABELS_VAL)

print(f"Train label files fixed: {fixed_train}")
print(f"Val label files fixed: {fixed_val}")


Train label files fixed: 517
Val label files fixed: 80


## RE-RUN BOUNDING BOX CHECK

In [17]:
# ================================
# RE-RUN BOUNDING BOX CHECK
# ================================
train_outside_after = check_bbox_outside_image(IMAGES_TRAIN, LABELS_TRAIN)
val_outside_after   = check_bbox_outside_image(IMAGES_VAL, LABELS_VAL)

print("Train boxes outside image AFTER cleaning:", len(train_outside_after))
print("Val boxes outside image AFTER cleaning:", len(val_outside_after))


Train boxes outside image AFTER cleaning: 12
Val boxes outside image AFTER cleaning: 2


In [None]:
# ================================
# FINAL PRE-PROCESSING CONFIRMATION
# ================================
print("Dataset is now:")
print("- Image/label pairs consistent")
print("- No empty annotations")
print("- Bounding boxes clipped inside image")
print("- YOLO format uniform and clean")

Dataset is now:
- Image/label pairs consistent
- No empty annotations
- Bounding boxes clipped inside image
- YOLO format uniform and clean


# TRAINING CONFIGURATION AND DATA AUGEMNTATION STRATEGY

In [20]:
from ultralytics import YOLO

print("YOLO version ready")

YOLO version ready


## Data.yaml

In [21]:
# ================================
# CREATE data.yaml WITH AUGMENTATION SUPPORT
# (YOLO applies augmentations automatically during training)
# ================================
import yaml
import os

data_yaml = {
    "path": DATASET_PATH,
    "train": "images/train",
    "val": "images/val",
    "names": {
        0: "player",
        1: "referee",
        2: "ball"
    }
}

yaml_path = os.path.join(DATASET_PATH, "data.yaml")

with open(yaml_path, "w") as f:
    yaml.dump(data_yaml, f, sort_keys=False)

print("data.yaml created at:", yaml_path)


data.yaml created at: C:\Users\fccat\Documents\Universita\DATA SCIENCE\Digital Signal and Image Management\Project\DETECTION\Second Try\Dataset\data.yaml


## DEFINE YOLO TRAINING WITH AUGMENTATIONS

In [22]:
# ================================
# DEFINE YOLO TRAINING WITH AUGMENTATIONS
# ================================
model = YOLO("yolov8n.pt")  # lightweight model for training

model.train(
    data=yaml_path,
    epochs=50,
    imgsz=640,
    batch=16,

    # ================================
    # DATA AUGMENTATION PARAMETERS
    # ================================
    hsv_h=0.015,     # HSV-Hue jitter
    hsv_s=0.7,       # HSV-Saturation jitter
    hsv_v=0.4,       # HSV-Value jitter

    fliplr=0.5,      # Horizontal flip
    scale=0.5,       # Random scaling
    translate=0.1,   # Random translation
    shear=0.0,

    mosaic=1.0,      # Mosaic augmentation
    mixup=0.1,       # MixUp augmentation

    # ================================
    # OPTIMIZATION
    # ================================
    optimizer="Adam",
    lr0=1e-3,

    project="football_detection",
    name="yolo_augmented"
)


[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt': 100% ━━━━━━━━━━━━ 6.2MB 7.9MB/s 0.8s.7s<0.1s0.6s
New https://pypi.org/project/ultralytics/8.3.250 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.239  Python-3.12.5 torch-2.9.1+cpu CPU (Intel Core i7-8565U 1.80GHz)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=C:\Users\fccat\Documents\Universita\DATA SCIENCE\Digital Signal and Image Management\Project\DETECTION\Second Try\Dataset\data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015,

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 2])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x0000025AEAEB1B20>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          

## EXPLICIT DOCUMENTATION OF AUGMENTATIONS USED

In [23]:
# ================================
# EXPLICIT DOCUMENTATION OF AUGMENTATIONS USED
# (for report / reproducibility)
# ================================
augmentations_used = {
    "Horizontal Flip": "fliplr=0.5",
    "Scaling": "scale=0.5",
    "HSV Jitter": "hsv_h=0.015, hsv_s=0.7, hsv_v=0.4",
    "Translation": "translate=0.1",
    "Mosaic": "mosaic=1.0",
    "MixUp": "mixup=0.1"
}

for aug, value in augmentations_used.items():
    print(f"{aug}: {value}")


Horizontal Flip: fliplr=0.5
Scaling: scale=0.5
HSV Jitter: hsv_h=0.015, hsv_s=0.7, hsv_v=0.4
Translation: translate=0.1
Mosaic: mosaic=1.0
MixUp: mixup=0.1


In [24]:
# ================================
# NOTE:
# Motion blur and random crop are
# internally approximated by:
# - Mosaic
# - Scaling + Translation
# in YOLOv8 training pipeline
# ================================
print("YOLO automatic augmentations enabled for robustness")


YOLO automatic augmentations enabled for robustness


# Model Evaluation

In [25]:
# ================================
# LOAD TRAINED MODEL (best weights)
# ================================
from ultralytics import YOLO
import os

MODEL_PATH = os.path.join(
    "football_detection",
    "yolo_augmented",
    "weights",
    "best.pt"
)

model = YOLO(MODEL_PATH)
print("Model loaded:", MODEL_PATH)


Model loaded: football_detection\yolo_augmented\weights\best.pt


## MODEL EVALUATION ON TEST SET

In [27]:
# ================================
# MODEL EVALUATION ON TEST SET
# ================================
metrics = model.val(
    data=yaml_path,
    imgsz=640,
    batch=16,
    split="val"
)


Ultralytics 8.3.239  Python-3.12.5 torch-2.9.1+cpu CPU (Intel Core i7-8565U 1.80GHz)
[34m[1mval: [0mFast image access  (ping: 0.40.1 ms, read: 9.43.5 MB/s, size: 248.2 KB)
[K[34m[1mval: [0mScanning C:\Users\fccat\Documents\Universita\DATA SCIENCE\Digital Signal and Image Management\Project\DETECTION\Second Try\Dataset\labels\val.cache... 80 images, 0 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 80/80 67.3Kit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 5/5 3.4s/it 16.9s4.6ss
                   all         80       1657      0.862      0.667      0.705      0.443
                player         80       1467      0.892       0.92       0.96      0.656
               referee         76        129      0.851      0.868      0.865      0.562
                  ball         61         61      0.842      0.213      0.289       0.11
Speed: 1.6ms preprocess, 132.1ms inference, 0.0ms loss, 2.1ms postprocess per image
Res

In [28]:
# ================================
# GLOBAL METRICS
# ================================
print("===== GLOBAL METRICS =====")
print(f"mAP@0.5      : {metrics.box.map50:.4f}")
print(f"mAP@0.5:0.95 : {metrics.box.map:.4f}")
print(f"Precision    : {metrics.box.mp:.4f}")
print(f"Recall       : {metrics.box.mr:.4f}")


===== GLOBAL METRICS =====
mAP@0.5      : 0.7049
mAP@0.5:0.95 : 0.4428
Precision    : 0.8620
Recall       : 0.6670


In [29]:
# ================================
# PER-CLASS METRICS
# ================================
CLASS_NAMES = {
    0: "player",
    1: "referee",
    2: "ball"
}

per_class_map50 = metrics.box.maps  # mAP@0.5 per class

print("===== PER-CLASS mAP@0.5 =====")
for i, map_value in enumerate(per_class_map50):
    print(f"{CLASS_NAMES[i]} : {map_value:.4f}")


===== PER-CLASS mAP@0.5 =====
player : 0.6563
referee : 0.5621
ball : 0.1100


In [30]:
# ================================
# PLAYER vs BALL PERFORMANCE
# ================================
player_map = per_class_map50[0]
ball_map   = per_class_map50[2]

print("===== PLAYER vs BALL =====")
print(f"Player mAP@0.5 : {player_map:.4f}")
print(f"Ball mAP@0.5   : {ball_map:.4f}")


===== PLAYER vs BALL =====
Player mAP@0.5 : 0.6563
Ball mAP@0.5   : 0.1100


## ERROR ANALYSIS

In [31]:
# ================================
# ERROR ANALYSIS – LOW CONFIDENCE DETECTIONS
# ================================
results = model.predict(
    source=IMAGES_VAL,
    imgsz=640,
    conf=0.25,
    save=False
)

low_conf_detections = 0
total_detections = 0

for r in results:
    if r.boxes is not None:
        for box in r.boxes:
            total_detections += 1
            if box.conf < 0.4:
                low_conf_detections += 1

print("===== ERROR ANALYSIS =====")
print(f"Total detections      : {total_detections}")
print(f"Low confidence boxes  : {low_conf_detections}")



image 1/80 C:\Users\fccat\Documents\Universita\DATA SCIENCE\Digital Signal and Image Management\Project\DETECTION\Second Try\Dataset\images\val\frame_0_133.jpg: 384x640 16 players, 3 referees, 262.1ms
image 2/80 C:\Users\fccat\Documents\Universita\DATA SCIENCE\Digital Signal and Image Management\Project\DETECTION\Second Try\Dataset\images\val\frame_0_135.jpg: 384x640 20 players, 2 referees, 290.6ms
image 3/80 C:\Users\fccat\Documents\Universita\DATA SCIENCE\Digital Signal and Image Management\Project\DETECTION\Second Try\Dataset\images\val\frame_0_141.jpg: 384x640 20 players, 1 referee, 276.4ms
image 4/80 C:\Users\fccat\Documents\Universita\DATA SCIENCE\Digital Signal and Image Management\Project\DETECTION\Second Try\Dataset\images\val\frame_0_143.jpg: 384x640 15 players, 2 referees, 175.0ms
image 5/80 C:\Users\fccat\Documents\Universita\DATA SCIENCE\Digital Signal and Image Management\Project\DETECTION\Second Try\Dataset\images\val\frame_0_144.jpg: 384x640 11 players, 176.2ms
image 6

## Small Object Detection Check

In [32]:
# ================================
# SMALL OBJECT DETECTION CHECK
# (ball bounding box area analysis)
# ================================
import numpy as np

ball_areas = []

for r in results:
    if r.boxes is None:
        continue
    for box in r.boxes:
        cls = int(box.cls)
        if cls == 2:  # ball
            x1, y1, x2, y2 = box.xyxy[0]
            area = (x2 - x1) * (y2 - y1)
            ball_areas.append(area.item())

if ball_areas:
    print("===== BALL SIZE ANALYSIS =====")
    print(f"Min area  : {np.min(ball_areas):.2f}")
    print(f"Mean area : {np.mean(ball_areas):.2f}")
    print(f"Max area  : {np.max(ball_areas):.2f}")
else:
    print("No ball detections found")


===== BALL SIZE ANALYSIS =====
Min area  : 110.69
Mean area : 183.33
Max area  : 353.98


## Summary 

In [33]:
# ================================
# FINAL EVALUATION SUMMARY
# ================================
print("===== FINAL MODEL EVALUATION =====")
print("- Fine-tuned YOLOv8 model evaluated on validation set")
print("- Player detection: high accuracy (large objects)")
print("- Ball detection: more challenging (small object detection)")
print("- Errors mainly due to small size, motion blur, occlusions")
print("- Data augmentation improves robustness but does not fully solve small object problem")


===== FINAL MODEL EVALUATION =====
- Fine-tuned YOLOv8 model evaluated on validation set
- Player detection: high accuracy (large objects)
- Ball detection: more challenging (small object detection)
- Errors mainly due to small size, motion blur, occlusions
- Data augmentation improves robustness but does not fully solve small object problem


In [48]:
# ================================
# IMAGE: SUCCESS VS FAILURE EXAMPLES
# ================================
# Find 2 frames: one with ball detected correctly, one with ball missed
success_frame = None
failure_frame = None

# Run prediction on all validation images (stop after 50 for speed)
for img_file in os.listdir(IMAGES_VAL)[:50]:
    if not img_file.endswith(".jpg"):
        continue
    img_path = os.path.join(IMAGES_VAL, img_file)
    img = cv2.imread(img_path)
    result = model(img, conf=0.35, verbose=False)[0]
    
    # check ball detection
    ball_detected = False
    if result.boxes is not None:
        for box in result.boxes:
            if int(box.cls) == 2:
                ball_detected = True
                break
    
    if ball_detected and success_frame is None:
        success_frame = img.copy()
    if not ball_detected and failure_frame is None:
        failure_frame = img.copy()
    
    if success_frame is not None and failure_frame is not None:
        break

# Draw boxes for both frames
def draw_predictions(img, result):
    if result.boxes is None:
        return img
    for box in result.boxes:
        cls = int(box.cls)
        conf = float(box.conf)
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        color = COLORS[cls]
        if cls == 2:
            cx = (x1+x2)//2
            cy = (y1+y2)//2
            radius = max(5, (x2-x1)//2)
            cv2.circle(img, (cx, cy), radius, color, 2)
            draw_label(img, f"ball {conf:.2f}", x1, y1, color)
        else:
            cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
            draw_label(img, f"{CLASS_NAMES[cls]} {conf:.2f}", x1, y1, color)
    return img

if success_frame is not None:
    result_success = model(success_frame, conf=0.35, verbose=False)[0]
    success_frame = draw_predictions(success_frame, result_success)

if failure_frame is not None:
    result_failure = model(failure_frame, conf=0.35, verbose=False)[0]
    failure_frame = draw_predictions(failure_frame, result_failure)

# Concatenate side by side for figure 2
if success_frame is not None and failure_frame is not None:
    combined = np.concatenate((success_frame, failure_frame), axis=1)
    img2_path = os.path.join(OUTPUT_DIR, "figure2_success_vs_failure.png")
    cv2.imwrite(img2_path, combined)
    print("Figure 2 saved:", img2_path)

Figure 2 saved: C:/Users/fccat/Documents/Universita/DATA SCIENCE/Digital Signal and Image Management/Project/DETECTION/Second Try/figures\figure2_success_vs_failure.png


# VIDEO INFERENCE CON STILE CUSTOM

In [None]:
import cv2
import os
from ultralytics import YOLO

MODEL_PATH = "C:/Users/fccat/Documents/Universita/DATA SCIENCE/Digital Signal and Image Management/Project/DETECTION/Second Try/football_detection/yolo_augmented/weights/best.pt"
VIDEO_PATH = "C:/Users/fccat/Documents/Universita/DATA SCIENCE/Digital Signal and Image Management/Project/DETECTION/Second Try/10.mp4"
OUTPUT_PATH = "C:/Users/fccat/Documents/Universita/DATA SCIENCE/Digital Signal and Image Management/Project/DETECTION/Second Try/football_detection/video_clean_output.mp4"

model = YOLO(MODEL_PATH)


In [45]:
COLORS = {
    0: (0, 255, 0),     # player → green
    1: (255, 0, 0),     # referee → blue
    2: (0, 255, 255)   # ball → yellow
}

CLASS_NAMES = {
    0: "player",
    1: "referee",
    2: "ball"
}

def draw_label(img, text, x, y, color):
    (w, h), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1)
    cv2.rectangle(img, (x, y - h - 6), (x + w + 4, y), color, -1)
    cv2.putText(img, text, (x + 2, y - 2),
                cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 0), 1)


## Inference and custom 

In [46]:
cap = cv2.VideoCapture(VIDEO_PATH)

fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(
    OUTPUT_PATH,
    fourcc,
    cap.get(cv2.CAP_PROP_FPS),
    (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
     int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    results = model(frame, conf=0.35, verbose=False)[0]

    if results.boxes is not None:
        for box in results.boxes:
            cls = int(box.cls)
            conf = float(box.conf)

            x1, y1, x2, y2 = map(int, box.xyxy[0])
            color = COLORS[cls]

            if cls == 2:  # BALL → circle
                cx = int((x1 + x2) / 2)
                cy = int((y1 + y2) / 2)
                radius = max(5, int((x2 - x1) / 2))
                cv2.circle(frame, (cx, cy), radius, color, 2)
                draw_label(frame, f"ball {conf:.2f}", x1, y1, color)

            else:  # PLAYER / REFEREE → box
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                draw_label(frame, f"{CLASS_NAMES[cls]} {conf:.2f}", x1, y1, color)

    out.write(frame)

cap.release()
out.release()

print("Clean video saved to:", OUTPUT_PATH)


Clean video saved to: C:/Users/fccat/Documents/Universita/DATA SCIENCE/Digital Signal and Image Management/Project/DETECTION/Second Try/football_detection/SoccerVideo_OUT.mp4


# OPTIONAL: REAL-TIME VISUALIZATION

In [49]:
# ================================
# OPTIONAL: REAL-TIME VISUALIZATION
# ================================
results = model.predict(
    source=VIDEO_PATH,
    imgsz=640,
    conf=0.25,
    show=True
)


Inference results will accumulate in RAM unless `stream=True` is passed, which can cause out-of-memory errors for large
sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs



KeyboardInterrupt: 