## Step 1: Setup & Imports

In [2]:
from pathlib import Path
import random
from ultralytics.models import YOLO
import torch
import yaml

random.seed(42)

In [3]:
from utils import DisplayPath
Path = DisplayPath

## Step 2: Configure Paths & Hyperparameters

In [4]:
# Dataset path (created by e2e_data_prep.ipynb)
YOLO_DATASET = Path("datasets/ready/full_dataset")
RUNS_DIR = Path("runs/segment")

# Verify dataset exists
if not YOLO_DATASET.exists():
    raise FileNotFoundError(f"Dataset not found at {YOLO_DATASET}. Run e2e_data_prep.ipynb first!")

print("Dataset:")
YOLO_DATASET.display()
print("  Train:")
(YOLO_DATASET / 'train').display()
print("  Val:")
(YOLO_DATASET / 'val').display()
print("  Test:")
(YOLO_DATASET / 'test').display()

Dataset:


[datasets/ready/full_dataset](datasets/ready/full_dataset)

  Train:


[datasets/ready/full_dataset/train](datasets/ready/full_dataset/train)

  Val:


[datasets/ready/full_dataset/val](datasets/ready/full_dataset/val)

  Test:


[datasets/ready/full_dataset/test](datasets/ready/full_dataset/test)

In [5]:
EPOCHS = 50
BATCH_SIZE = 16
IMG_SIZE = 640
model_type = "yolo11n-seg.pt"
DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu'

print(f"Device: {DEVICE}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"CUDA: {torch.version.cuda}")

Device: cuda:0
GPU: NVIDIA GeForce RTX 3080 Laptop GPU
CUDA: 12.8


In [5]:
AUG_CONFIG = {
    'hsv_h': 0.015,  # Hue augmentation
    'hsv_s': 0.7,    # Saturation
    'hsv_v': 0.4,    # Value
    'degrees': 10.0,  # Rotation
    'translate': 0.1, # Translation
    'scale': 0.5,     # Scaling
    'shear': 0.0,     # Shearing
    'perspective': 0.0, # Perspective
    'flipud': 0.0,    # Vertical flip
    'fliplr': 0.5,    # Horizontal flip
    'mosaic': 1.0,    # Mosaic augmentation
    'mixup': 0.0,     # Mixup augmentation
}

## Step 3: Verify Dataset Structure

Dataset is already prepared by e2e_data_prep.ipynb

In [6]:
# Verify dataset structure
print("="*60)
print("DATASET VERIFICATION")
print("="*60)

splits = ['train', 'val', 'test']
stats = {}

for split in splits:
    img_dir = YOLO_DATASET / split / "images"
    lbl_dir = YOLO_DATASET / split / "labels"
    
    if img_dir.exists() and lbl_dir.exists():
        num_images = len(list(img_dir.glob("*")))
        num_labels = len(list(lbl_dir.glob("*.txt")))
        stats[split] = {'images': num_images, 'labels': num_labels}
        print(f"{split.upper():5s}: {num_images:4d} images, {num_labels:4d} labels")
    else:
        stats[split] = {'images': 0, 'labels': 0}
        print(f"{split.upper():5s}: Missing!")

total_images = sum(s['images'] for s in stats.values())
total_labels = sum(s['labels'] for s in stats.values())

print(f"{'TOTAL':5s}: {total_images:4d} images, {total_labels:4d} labels")
print("="*60)

if total_images == 0:
    raise RuntimeError("No dataset found! Run e2e_data_prep.ipynb to create the dataset.")

DATASET VERIFICATION
TRAIN:  998 images,  998 labels
VAL  :   47 images,   47 labels
TEST :  209 images,  209 labels
TOTAL: 1254 images, 1254 labels


## Step 4: Create YOLO Configuration File

In [7]:
classes = {
    'red ball': 0,
    'human': 1,
    'trashcan': 2
}

config = {
    'path': str(YOLO_DATASET.absolute()),
    'train': 'train/images',
    'val': 'val/images',
    'nc': len(classes),
    'names': list(classes.keys())
}

config_path = YOLO_DATASET / 'data.yaml'
with open(config_path, 'w') as f:
    yaml.dump(config, f, default_flow_style=False)

print(f"‚úì Configuration saved: {config_path}")
print("Dataset structure:")
YOLO_DATASET.display()
print("  Train:")
(YOLO_DATASET / 'train').display()
print("  Val:")
(YOLO_DATASET / 'val').display()
print("  Test:")
(YOLO_DATASET / 'test').display()

‚úì Configuration saved: datasets/ready/full_dataset/data.yaml
Dataset structure:


[datasets/ready/full_dataset](datasets/ready/full_dataset)

  Train:


[datasets/ready/full_dataset/train](datasets/ready/full_dataset/train)

  Val:


[datasets/ready/full_dataset/val](datasets/ready/full_dataset/val)

  Test:


[datasets/ready/full_dataset/test](datasets/ready/full_dataset/test)

## Step 5: Train Model

Train YOLOv11 with:
- Data augmentation on train set
- Checkpoints saved for best model
- Validation after each epoch

In [8]:
# Load pretrained model
model = YOLO(model_type)

In [8]:
project_name = 'ball_person_trashcan_model'

In [10]:

head_idx = next((i for i, m in enumerate(model.model.model) if 'Detect' in m.__class__.__name__ or 'Segment' in m.__class__.__name__), len(model.model.model) - 1)
results = model.train(
    data=str(config_path),
    epochs=500,
    freeze=list(range(head_idx)),
    batch=BATCH_SIZE,
    imgsz=IMG_SIZE,
    device=DEVICE,
    project=str(RUNS_DIR),
    name=project_name,
    exist_ok=True,
    
    # Checkpointing
    save=True,
    save_period=1,  # Save every epoch
    
    # Validation
    val=True,
    
    # Data augmentation (only applied to train)
    **AUG_CONFIG,
    
    # Optimizer
    optimizer='Adam',
    lr0=0.001,
    lrf=0.01,
    momentum=0.937,
    weight_decay=0.0005,
    
    # Loss weights
    box=7.5,
    cls=0.5,
    dfl=1.5,
    
    # Other
    patience=20,  # Early stopping
    workers=8,

    verbose=True)

New https://pypi.org/project/ultralytics/8.3.246 available üòÉ Update with 'pip install -U ultralytics'
Ultralytics 8.3.235 üöÄ Python-3.12.10 torch-2.9.1+cu128 CUDA:0 (NVIDIA GeForce RTX 3080 Laptop GPU, 8192MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=datasets/ready/full_dataset/data.yaml, degrees=10.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=500, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22], half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.001, lrf=0.01, mask_ratio=4, max_det=300,

In [9]:
best_model = Path('./runs/segment/ball_person_trashcan_model/weights/best.pt')
best_model.display()
model = YOLO(best_model)

[runs/segment/ball_person_trashcan_model/weights/best.pt](runs/segment/ball_person_trashcan_model/weights/best.pt)

## Step 6: Evaluate Results

In [10]:
# Validation metrics
metrics = model.val()

print("\n" + "="*60)
print("VALIDATION METRICS")
print("="*60)
print(f"Box mAP50: {metrics.box.map50:.4f}")
print(f"Box mAP50-95: {metrics.box.map:.4f}")
print(f"Mask mAP50: {metrics.seg.map50:.4f}")
print(f"Mask mAP50-95: {metrics.seg.map:.4f}")

Ultralytics 8.3.235 üöÄ Python-3.12.10 torch-2.9.1+cu128 CUDA:0 (NVIDIA GeForce RTX 3080 Laptop GPU, 8192MiB)
YOLO11n-seg summary (fused): 113 layers, 2,835,153 parameters, 0 gradients, 9.6 GFLOPs
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 1156.9¬±392.0 MB/s, size: 2696.9 KB)
[K[34m[1mval: [0mScanning /home/tonino/projects/ball segmentation/datasets/ready/full_dataset/val/labels.cache... 47 images, 0 backgrounds, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 47/47 35.8Kit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 3/3 6.2s/it 18.6s5.0s4
                   all         47         81      0.573      0.461      0.522      0.486      0.573      0.461      0.522      0.425
              red ball         34         34      0.987      0.382      0.594      0.526      0.987      0.382      0.594      0.478
             

In [15]:

# Find best checkpoint
model_dir = RUNS_DIR / project_name
best_model = model_dir / 'weights' / 'best.pt'
last_model = model_dir / 'weights' / 'last.pt'

print(f"Best model: ")
best_model.display()
print(f"   Last model: ")
last_model.display()
print(f"   Results: ")
model_dir.display()

Best model: 


[runs/segment/ball_person_trashcan_model/weights/best.pt](runs/segment/ball_person_trashcan_model/weights/best.pt)

   Last model: 


[runs/segment/ball_person_trashcan_model/weights/last.pt](runs/segment/ball_person_trashcan_model/weights/last.pt)

   Results: 


[runs/segment/ball_person_trashcan_model](runs/segment/ball_person_trashcan_model)

## Step 7: Test on Sample Images (Optional)

In [16]:
# Load best model
# best_model_path = RUNS_DIR / 'ball_person_model' / 'weights' / 'last.pt'

# Test on validation images (sample from val set)
test_images = list((YOLO_DATASET / "val" / "images").glob("*"))

print(f"Testing on {len(test_images)} sample images...")

for img_path in test_images:
    results = model.predict(str(img_path), save=True, conf=0.25)
    print(f"  ‚úì {img_path.name}")

print(f"\nResults saved to: {RUNS_DIR / 'ball_person_model'}")

Testing on 47 sample images...

image 1/1 /home/tonino/projects/ball segmentation/datasets/ready/full_dataset/val/images/366acb21b00b40588372736b95776fac.jpg: 640x480 1 red ball, 1 human, 70.3ms
Speed: 3.3ms preprocess, 70.3ms inference, 7.9ms postprocess per image at shape (1, 3, 640, 480)
Results saved to [1m/home/tonino/projects/ball segmentation/runs/segment/predict8[0m
  ‚úì 366acb21b00b40588372736b95776fac.jpg

image 1/1 /home/tonino/projects/ball segmentation/datasets/ready/full_dataset/val/images/0d4db7c113776bc0a401d833d556df84.jpg: 640x480 1 red ball, 1 human, 12.8ms
Speed: 2.3ms preprocess, 12.8ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 480)
Results saved to [1m/home/tonino/projects/ball segmentation/runs/segment/predict8[0m
  ‚úì 0d4db7c113776bc0a401d833d556df84.jpg

image 1/1 /home/tonino/projects/ball segmentation/datasets/ready/full_dataset/val/images/a6b631525ff8dc9bc26bb53e97481606.jpg: 640x480 1 human, 27.9ms
Speed: 2.1ms preprocess, 27.9ms inf