In [1]:
import os 
import shutil
from ultralytics import YOLO
import torch

## Prepare Paths

In [2]:
NOTEBOOKS_PATH = os.getcwd()
print(NOTEBOOKS_PATH)

/home/gomaa/Ai/projects/Football-Match-Analysis/notebooks


In [3]:
PROJECT_PATH = os.path.dirname(NOTEBOOKS_PATH)
print(PROJECT_PATH)

/home/gomaa/Ai/projects/Football-Match-Analysis


In [4]:
DATA_PATH = os.path.join(PROJECT_PATH, 'data')
MODELS_PATH = os.path.join(PROJECT_PATH, 'models')
print(f"Data Path: {DATA_PATH}")
print(f"Models Path: {MODELS_PATH}")


Data Path: /home/gomaa/Ai/projects/Football-Match-Analysis/data
Models Path: /home/gomaa/Ai/projects/Football-Match-Analysis/models


In [5]:
dataset_dir_name = 'football-players-detection-dataset'
DATASET_PATH = os.path.join(DATA_PATH, dataset_dir_name)
print(f"Dataset Path: {DATASET_PATH}")

Dataset Path: /home/gomaa/Ai/projects/Football-Match-Analysis/data/football-players-detection-dataset


In [6]:
DATA_YAML_PATH = os.path.join(DATASET_PATH, 'data.yaml')
print(DATA_YAML_PATH)

/home/gomaa/Ai/projects/Football-Match-Analysis/data/football-players-detection-dataset/data.yaml


In [7]:
TRAIN_IMGS_PATH = os.path.join(DATASET_PATH, 'train/images')
TRAIN_LBLS_PATH = os.path.join(DATASET_PATH, 'train/labels')

VALID_IMGS_PATH = os.path.join(DATASET_PATH, 'valid/images')
VALID_LBLS_PATH = os.path.join(DATASET_PATH, 'valid/labels')

TEST_IMGS_PATH = os.path.join(DATASET_PATH, 'test/images')
TEST_LBLS_PATH = os.path.join(DATASET_PATH, 'test/labels')

print(f"Train images path: {TRAIN_IMGS_PATH}")
print(f"Train labels path: {TRAIN_LBLS_PATH}")
print(f"Valid images path: {VALID_IMGS_PATH}")
print(f"Valid labels path: {VALID_LBLS_PATH}")
print(f"Test images path: {TEST_IMGS_PATH}")
print(f"Test labels path: {TEST_LBLS_PATH}")

Train images path: /home/gomaa/Ai/projects/Football-Match-Analysis/data/football-players-detection-dataset/train/images
Train labels path: /home/gomaa/Ai/projects/Football-Match-Analysis/data/football-players-detection-dataset/train/labels
Valid images path: /home/gomaa/Ai/projects/Football-Match-Analysis/data/football-players-detection-dataset/valid/images
Valid labels path: /home/gomaa/Ai/projects/Football-Match-Analysis/data/football-players-detection-dataset/valid/labels
Test images path: /home/gomaa/Ai/projects/Football-Match-Analysis/data/football-players-detection-dataset/test/images
Test labels path: /home/gomaa/Ai/projects/Football-Match-Analysis/data/football-players-detection-dataset/test/labels


# Train YOLOv8 Model

In [8]:
print("CUDA available:", torch.cuda.is_available())
print("GPU count:", torch.cuda.device_count())
print("Device:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "None")

CUDA available: True
GPU count: 1
Device: NVIDIA GeForce RTX 2050


In [9]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(DEVICE)

cuda


In [None]:
model = YOLO("yolov8m.pt")

In [None]:
EPOCHS = 50
IMG_SIZE = 1024
BATCH_SIZE = 4

In [None]:
model.train(
    data=DATA_YAML_PATH,
    epochs=EPOCHS,
    imgsz = IMG_SIZE,
    batch = BATCH_SIZE,
    project = MODELS_PATH,
    plots=True
)

### Test Model

In [10]:
TRAINED_MODEL_PATH = os.path.join(MODELS_PATH, 'train5/weights/best.pt')
print(f"Trained Model path: {TRAINED_MODEL_PATH}")

Trained Model path: /home/gomaa/Ai/projects/Football-Match-Analysis/models/train5/weights/best.pt


In [11]:
trained_model = YOLO(TRAINED_MODEL_PATH)

In [None]:
results = trained_model.val(data=DATA_YAML_PATH,split='val',imgsz=1024)

In [None]:
results = trained_model.val(data=DATA_YAML_PATH,split='test',imgsz=1024)

In [16]:
RESULTS_PATH = os.path.join(PROJECT_PATH,'runs')
print(f"Results path: {RESULTS_PATH}")

Results path: /home/gomaa/Ai/projects/Football-Match-Analysis/runs


In [20]:
trained_model.predict(source=TEST_IMGS_PATH,
              conf=0.4,
              save=True,
              classes = [0,1,2,3],
              project = RESULTS_PATH)


image 1/25 /home/gomaa/Ai/projects/Football-Match-Analysis/data/football-players-detection-dataset/test/images/08fd33_3_6_png.rf.261781c58b95436fb40e6afc0495bc57.jpg: 1024x1024 1 ball, 20 players, 2 referees, 156.4ms
image 2/25 /home/gomaa/Ai/projects/Football-Match-Analysis/data/football-players-detection-dataset/test/images/08fd33_9_3_png.rf.20ff5bcc914d7f607cf9425c73da679c.jpg: 1024x1024 20 players, 1 referee, 151.3ms
image 3/25 /home/gomaa/Ai/projects/Football-Match-Analysis/data/football-players-detection-dataset/test/images/40cd38_7_6_png.rf.3e49856b19208cd7b616cd826a793f08.jpg: 1024x1024 21 players, 1 referee, 151.7ms
image 4/25 /home/gomaa/Ai/projects/Football-Match-Analysis/data/football-players-detection-dataset/test/images/42ba34_1_5_png.rf.e530a34d1588130dae6f6cba815688ce.jpg: 1024x1024 1 ball, 1 goalkeeper, 20 players, 2 referees, 151.9ms
image 5/25 /home/gomaa/Ai/projects/Football-Match-Analysis/data/football-players-detection-dataset/test/images/42ba34_5_5_png.rf.4153d2

[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'ball', 1: 'goalkeeper', 2: 'player', 3: 'referee'}
 obb: None
 orig_img: array([[[255, 253, 249],
         [255, 254, 250],
         [254, 255, 251],
         ...,
         [106,  94,  76],
         [122, 110,  92],
         [113, 101,  83]],
 
        [[255, 254, 250],
         [255, 254, 250],
         [254, 255, 251],
         ...,
         [106,  94,  76],
         [128, 116,  98],
         [116, 104,  86]],
 
        [[255, 254, 250],
         [253, 254, 250],
         [252, 255, 251],
         ...,
         [140, 130, 113],
         [136, 126, 109],
         [ 94,  84,  67]],
 
        ...,
 
        [[145, 147, 117],
         [139, 141, 112],
         [147, 144, 116],
         ...,
         [ 55,  53,  45],
         [105, 103,  95],
         [102, 100,  92]],
 
        [[146, 148, 119],
         [140, 142, 113],
         [146, 1

In [21]:
# Show balls only
trained_model.predict(source=TEST_IMGS_PATH,
              conf=0.4,
              save=True,
              classes = [0],
              project = RESULTS_PATH)


image 1/25 /home/gomaa/Ai/projects/Football-Match-Analysis/data/football-players-detection-dataset/test/images/08fd33_3_6_png.rf.261781c58b95436fb40e6afc0495bc57.jpg: 1024x1024 1 ball, 157.7ms
image 2/25 /home/gomaa/Ai/projects/Football-Match-Analysis/data/football-players-detection-dataset/test/images/08fd33_9_3_png.rf.20ff5bcc914d7f607cf9425c73da679c.jpg: 1024x1024 (no detections), 155.8ms
image 3/25 /home/gomaa/Ai/projects/Football-Match-Analysis/data/football-players-detection-dataset/test/images/40cd38_7_6_png.rf.3e49856b19208cd7b616cd826a793f08.jpg: 1024x1024 (no detections), 151.4ms
image 4/25 /home/gomaa/Ai/projects/Football-Match-Analysis/data/football-players-detection-dataset/test/images/42ba34_1_5_png.rf.e530a34d1588130dae6f6cba815688ce.jpg: 1024x1024 1 ball, 151.6ms
image 5/25 /home/gomaa/Ai/projects/Football-Match-Analysis/data/football-players-detection-dataset/test/images/42ba34_5_5_png.rf.4153d27549bb9aad06d11a54c1ea3dc3.jpg: 1024x1024 (no detections), 153.0ms
image 6

[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'ball', 1: 'goalkeeper', 2: 'player', 3: 'referee'}
 obb: None
 orig_img: array([[[255, 253, 249],
         [255, 254, 250],
         [254, 255, 251],
         ...,
         [106,  94,  76],
         [122, 110,  92],
         [113, 101,  83]],
 
        [[255, 254, 250],
         [255, 254, 250],
         [254, 255, 251],
         ...,
         [106,  94,  76],
         [128, 116,  98],
         [116, 104,  86]],
 
        [[255, 254, 250],
         [253, 254, 250],
         [252, 255, 251],
         ...,
         [140, 130, 113],
         [136, 126, 109],
         [ 94,  84,  67]],
 
        ...,
 
        [[145, 147, 117],
         [139, 141, 112],
         [147, 144, 116],
         ...,
         [ 55,  53,  45],
         [105, 103,  95],
         [102, 100,  92]],
 
        [[146, 148, 119],
         [140, 142, 113],
         [146, 1

# DATASET ANALYSIS

In [None]:
import os
import yaml
import cv2
import numpy as np
from pathlib import Path
from collections import defaultdict, Counter
import random

def analyze_yolo_dataset(dataset_path, show_prints=True):
    """
    Analyze a YOLO format dataset and provide comprehensive insights.
    
    Args:
        dataset_path (str): Path to the dataset root directory containing data.yaml
        show_prints (bool): Whether to print statistics to console
    
    Returns:
        dict: Dictionary containing all computed statistics
    """
    
    dataset_path = Path(dataset_path)
    stats = {}
    
    # Load data.yaml
    yaml_path = dataset_path / 'data.yaml'
    if not yaml_path.exists():
        raise FileNotFoundError(f"data.yaml not found at {yaml_path}")
    
    with open(yaml_path, 'r') as f:
        data_config = yaml.safe_load(f)
    
    # Extract class information
    class_names = data_config.get('names', [])
    num_classes = data_config.get('nc', len(class_names))
    stats['classes'] = class_names
    stats['num_classes'] = num_classes
    
    if show_prints:
        print("=" * 60)
        print("YOLO DATASET ANALYSIS")
        print("=" * 60)
        print(f"\nClasses ({num_classes}):")
        for idx, name in enumerate(class_names):
            print(f"  {idx}: {name}")
    
    # Analyze each split
    splits = ['train', 'val', 'test']
    split_stats = {}
    all_class_counts = Counter()
    all_image_sizes = []
    
    for split in splits:
        if split not in data_config:
            continue
            
        split_path_str = data_config[split]
        
        # Handle both absolute and relative paths
        if os.path.isabs(split_path_str):
            split_path = Path(split_path_str)
        else:
            split_path = dataset_path / split_path_str
        
        # Determine images and labels paths - try multiple common structures
        images_path = None
        labels_path = None
        
        # Structure 1: split_path directly points to images folder
        if split_path.exists() and split_path.name == 'images':
            images_path = split_path
            labels_path = split_path.parent / 'labels'
        # Structure 2: split_path contains 'images' in the path
        elif 'images' in str(split_path):
            images_path = split_path
            labels_path = Path(str(split_path).replace('images', 'labels'))
        # Structure 3: split_path/images and split_path/labels
        elif (split_path / 'images').exists():
            images_path = split_path / 'images'
            labels_path = split_path / 'labels'
        # Structure 4: dataset_path/images/split and dataset_path/labels/split
        elif (dataset_path / 'images' / split).exists():
            images_path = dataset_path / 'images' / split
            labels_path = dataset_path / 'labels' / split
        else:
            # Default assumption
            images_path = split_path
            labels_path = split_path.parent / 'labels' if split_path.parent.name == 'images' else split_path / 'labels'
        
        if not images_path.exists():
            continue
        
        # Get image files
        image_files = list(images_path.glob('*.jpg')) + list(images_path.glob('*.png')) + \
                     list(images_path.glob('*.jpeg')) + list(images_path.glob('*.JPG')) + \
                     list(images_path.glob('*.PNG')) + list(images_path.glob('*.JPEG'))
        
        num_images = len(image_files)
        split_stats[split] = {
            'num_images': num_images,
            'image_sizes': [],
            'class_counts': Counter(),
            'images_path': images_path,
            'labels_path': labels_path
        }
        
        # Analyze images and labels
        for img_file in image_files:
            # Get image size
            img = cv2.imread(str(img_file))
            if img is not None:
                h, w = img.shape[:2]
                split_stats[split]['image_sizes'].append((w, h))
                all_image_sizes.append((w, h))
            
            # Count classes in labels - try different label file locations
            label_file = labels_path / f"{img_file.stem}.txt"
            
            # If label file doesn't exist in expected location, try same directory
            if not label_file.exists():
                label_file = img_file.parent.parent / 'labels' / img_file.parent.name / f"{img_file.stem}.txt"
            
            if not label_file.exists():
                # Try parallel labels directory
                label_file = img_file.parent.parent / 'labels' / f"{img_file.stem}.txt"
            
            if label_file.exists():
                try:
                    with open(label_file, 'r') as f:
                        for line in f:
                            line = line.strip()
                            if line:  # Skip empty lines
                                parts = line.split()
                                if parts:
                                    try:
                                        class_id = int(parts[0])
                                        if 0 <= class_id < num_classes:
                                            split_stats[split]['class_counts'][class_id] += 1
                                            all_class_counts[class_id] += 1
                                    except (ValueError, IndexError):
                                        continue
                except Exception as e:
                    if show_prints:
                        print(f"Warning: Could not read label file {label_file}: {e}")
    
    stats['splits'] = split_stats
    stats['total_images'] = sum(s['num_images'] for s in split_stats.values())
    stats['all_class_counts'] = all_class_counts
    
    # Print statistics
    if show_prints:
        print(f"\n{'=' * 60}")
        print("DATASET DISTRIBUTION")
        print("=" * 60)
        print(f"\nTotal Images: {stats['total_images']}")
        
        for split in splits:
            if split in split_stats:
                print(f"  {split.capitalize()}: {split_stats[split]['num_images']} images")
        
        print(f"\n{'=' * 60}")
        print("IMAGE SIZES")
        print("=" * 60)
        
        for split in splits:
            if split in split_stats and split_stats[split]['image_sizes']:
                sizes = split_stats[split]['image_sizes']
                unique_sizes = set(sizes)
                print(f"\n{split.capitalize()}:")
                print(f"  Unique sizes: {len(unique_sizes)}")
                if len(unique_sizes) <= 5:
                    for size in unique_sizes:
                        count = sizes.count(size)
                        print(f"    {size[0]}x{size[1]}: {count} images")
                else:
                    size_counter = Counter(sizes)
                    print(f"  Most common sizes:")
                    for size, count in size_counter.most_common(3):
                        print(f"    {size[0]}x{size[1]}: {count} images")
        
        print(f"\n{'=' * 60}")
        print("CLASS DISTRIBUTION")
        print("=" * 60)
        print(f"\nTotal Instances per Class:")
        
        for class_id in range(num_classes):
            class_name = class_names[class_id] if class_id < len(class_names) else f"Class_{class_id}"
            total_count = all_class_counts.get(class_id, 0)
            print(f"  {class_name}: {total_count}")
            
            split_details = []
            for split in splits:
                if split in split_stats:
                    count = split_stats[split]['class_counts'].get(class_id, 0)
                    if count > 0:
                        split_details.append(f"{split}={count}")
            
            if split_details:
                print(f"    ({', '.join(split_details)})")
    
    
    return stats

In [37]:
stats = analyze_yolo_dataset(
    dataset_path=DATASET_PATH,
    show_prints=True
)

YOLO DATASET ANALYSIS

Classes (4):
  0: ball
  1: goalkeeper
  2: player
  3: referee

DATASET DISTRIBUTION

Total Images: 372
  Train: 298 images
  Val: 49 images
  Test: 25 images

IMAGE SIZES

Train:
  Unique sizes: 1
    576x576: 298 images

Val:
  Unique sizes: 1
    576x576: 49 images

Test:
  Unique sizes: 1
    576x576: 25 images

CLASS DISTRIBUTION

Total Instances per Class:
  ball: 327
    (train=258, val=45, test=24)
  goalkeeper: 288
    (train=230, val=39, test=19)
  player: 7427
    (train=5955, val=973, test=499)
  referee: 863
    (train=690, val=117, test=56)


In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from pathlib import Path
import random
import yaml


def plot_yolo_samples(dataset_path, stats, num_samples=6, splits=['train', 'val', 'test']):
    """
    Plot random sample images with labels from YOLO dataset splits.
    
    Args:
        dataset_path (str): Path to the dataset root directory containing data.yaml
        stats (dict): Statistics dictionary returned from analyze_yolo_dataset()
        num_samples (int): Number of random images to plot per split
        splits (list): List of splits to plot (e.g., ['train', 'val', 'test'])
    """
    
    dataset_path = Path(dataset_path)
    
    # Load data.yaml to get class names and paths
    yaml_path = dataset_path / 'data.yaml'
    if not yaml_path.exists():
        raise FileNotFoundError(f"data.yaml not found at {yaml_path}")
    
    with open(yaml_path, 'r') as f:
        data_config = yaml.safe_load(f)
    
    class_names = stats.get('classes', [])
    
    # Plot samples for each split
    for split in splits:
        if split not in stats['splits'] or stats['splits'][split]['num_images'] == 0:
            print(f"Skipping {split} - no images found")
            continue
        
        images_path = stats['splits'][split]['images_path']
        labels_path = stats['splits'][split]['labels_path']
        
        # Get all image files
        image_files = list(images_path.glob('*.jpg')) + list(images_path.glob('*.png')) + \
                     list(images_path.glob('*.jpeg')) + list(images_path.glob('*.JPG')) + \
                     list(images_path.glob('*.PNG')) + list(images_path.glob('*.JPEG'))
        
        if len(image_files) == 0:
            print(f"No image files found in {images_path}")
            continue
        
        # Select random samples
        sample_size = min(num_samples, len(image_files))
        sampled_images = random.sample(image_files, sample_size)
        
        # Create subplot grid
        cols = 3
        rows = (sample_size + cols - 1) // cols
        fig, axes = plt.subplots(rows, cols, figsize=(15, 5 * rows))
        fig.suptitle(f'{split.capitalize()} Dataset - Random Samples with Labels', 
                    fontsize=16, fontweight='bold')
        
        # Handle single image case
        if sample_size == 1:
            axes = np.array([axes])
        axes = axes.flatten() if sample_size > 1 else axes
        
        # Plot each sampled image
        for idx, img_file in enumerate(sampled_images):
            img = cv2.imread(str(img_file))
            if img is None:
                print(f"Warning: Could not read image {img_file}")
                continue
            
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            h, w = img.shape[:2]
            
            ax = axes[idx]
            ax.imshow(img)
            
            # Find corresponding label file
            label_file = labels_path / f"{img_file.stem}.txt"
            
            # Try alternative label locations if not found
            if not label_file.exists():
                label_file = img_file.parent.parent / 'labels' / img_file.parent.name / f"{img_file.stem}.txt"
            
            if not label_file.exists():
                label_file = img_file.parent.parent / 'labels' / f"{img_file.stem}.txt"
            
            # Draw bounding boxes and labels
            num_objects = 0
            if label_file.exists():
                try:
                    with open(label_file, 'r') as f:
                        for line in f:
                            line = line.strip()
                            if not line:
                                continue
                            
                            parts = line.split()
                            if len(parts) >= 5:
                                try:
                                    class_id = int(parts[0])
                                    x_center, y_center, width, height = map(float, parts[1:5])
                                    
                                    # Convert YOLO format (normalized) to pixel coordinates
                                    x_center *= w
                                    y_center *= h
                                    width *= w
                                    height *= h
                                    
                                    x1 = x_center - width / 2
                                    y1 = y_center - height / 2
                                    
                                    # Draw bounding box
                                    rect = patches.Rectangle((x1, y1), width, height, 
                                                            linewidth=2, edgecolor='red', 
                                                            facecolor='none')
                                    ax.add_patch(rect)
                                    
                                    # Add class label with background
                                    class_name = class_names[class_id] if class_id < len(class_names) else f"Class_{class_id}"
                                    ax.text(x1, y1 - 5, class_name, color='white', 
                                           fontsize=9, fontweight='bold',
                                           bbox=dict(boxstyle='round,pad=0.3', 
                                                   facecolor='red', alpha=0.8))
                                    
                                    num_objects += 1
                                except (ValueError, IndexError) as e:
                                    print(f"Warning: Malformed label in {label_file}: {line}")
                                    continue
                except Exception as e:
                    print(f"Warning: Could not read label file {label_file}: {e}")
            
            ax.axis('off')
            ax.set_title(f"{img_file.name}\n({w}x{h}, {num_objects} objects)", 
                        fontsize=9)
        
        # Hide extra subplots
        for idx in range(sample_size, len(axes)):
            axes[idx].axis('off')
        
        plt.tight_layout()
        plt.show()
    
    print(f"\nPlotted samples from {len([s for s in splits if s in stats['splits']])} split(s)")

In [39]:
plot_yolo_samples(DATASET_PATH, stats, num_samples=6, splits=['train', 'val', 'test'])

<Figure size 1500x1000 with 6 Axes>

<Figure size 1500x1000 with 6 Axes>

<Figure size 1500x1000 with 6 Axes>


Plotted samples from 3 split(s)
