# Segment IRL Validation picture

In [1]:
from pathlib import Path
import shutil
from PIL import Image
import numpy as np
import cv2
from ultralytics.models import YOLO
import torch
from tqdm import tqdm

from src.detection import RoboflowDetector
from src.segmentation import FastSAMSegmenter
from src.pipeline import img_pipeline

ModelDependencyMissing: Your `inference` configuration does not support SAM3 model. Install SAM3 dependencies and set CORE_MODEL_SAM3_ENABLED to True.


In [None]:
# Ball detection + segmentation pipeline (Roboflow + FastSAM)
# Model ID is hardcoded, API key is loaded from .env file
ball_detector = RoboflowDetector()  # Uses default model_id and ROBOFLOW_API_KEY env var
ball_segmenter = FastSAMSegmenter()

# Person model (pretrained YOLO-seg)
PERSON_MODEL_PATH = Path('models/pretrained/yolo11n-seg.pt')
person_model = YOLO(str(PERSON_MODEL_PATH))

print(f"âœ“ Ball detector (Roboflow): {ball_detector.DEFAULT_MODEL_ID}")
print(f"âœ“ Ball segmenter: FastSAM")
print(f"âœ“ Person model: {PERSON_MODEL_PATH}")

## Step 6: Display Statistics

In [None]:
# Confidence threshold
CONF_THRESHOLD = 0.5

# Statistics
stats = {"total": 0, "with_ball": 0, "with_person": 0, "empty": 0}

print(f"Combining ball + person masks...")
print(f"Confidence threshold (person): {CONF_THRESHOLD}")
print(f"Class priority: ball > person")
print()

for img_path in tqdm(img_paths, desc="Combining masks"):
    stats["total"] += 1
    
    # Load image to get dimensions
    img = Image.open(img_path)
    h, w = img.height, img.width
    
    # Initialize combined mask (all background)
    combined_mask = np.zeros((h, w), dtype=np.uint8)
    has_detections = False
    
    # --- 1. Load ball segmentation from txt (if exists) ---
    ball_txt_path = BALL_TXT_OUTPUT / (img_path.stem + '.txt')
    if ball_txt_path.exists():
        # Parse YOLO polygon format: "class_id x1 y1 x2 y2 ..."
        with open(ball_txt_path, 'r') as f:
            lines = f.readlines()
            
        for line in lines:
            parts = line.strip().split()
            if len(parts) < 7:  # Need at least class_id + 3 points (6 coords)
                continue
            
            # Extract normalized coordinates
            coords = [float(p) for p in parts[1:]]
            
            # Convert to pixel coordinates
            points = []
            for i in range(0, len(coords), 2):
                x = int(coords[i] * w)
                y = int(coords[i+1] * h)
                points.append([x, y])
            
            # Fill polygon with ball class (0)
            points_array = np.array(points, dtype=np.int32)
            cv2.fillPoly(combined_mask, [points_array], 0)
            has_detections = True
        
        stats["with_ball"] += 1
    
    # --- 2. Segment Persons (classe 1) ---
    person_results = person_model.predict(
        str(img_path), 
        classes=[0],  # Person class in COCO
        conf=CONF_THRESHOLD, 
        device=DEVICE,
        verbose=False
    )
    
    if person_results[0].masks is not None:
        for mask in person_results[0].masks.data:
            mask_np = (mask.cpu().numpy() > 0.5).astype(np.uint8)
            
            # Resize if needed (preserve class ids with INTER_NEAREST)
            if mask_np.shape != (h, w):
                mask_np = cv2.resize(mask_np, (w, h), interpolation=cv2.INTER_NEAREST)
            
            # Add person mask ONLY where combined_mask is still 0 (background)
            # This ensures ball (class 0) has priority
            person_area = (combined_mask == 0) & (mask_np == 1)
            combined_mask[person_area] = 1  # classe person
            has_detections = True
        
        stats["with_person"] += 1
    
    # Track empty images
    if not has_detections:
        stats["empty"] += 1
    
    # Save mask (even if empty = all zeros)
    mask_img = Image.fromarray(combined_mask, mode='L')
    mask_img.save(IRL_LABELS / (img_path.stem + '.png'))
    
    # Copy original image
    shutil.copy(img_path, IRL_IMAGES / img_path.name)

print("\nâœ“ Processing complete!")

In [None]:
print("=" * 60)
print("ðŸ“Š DATASET STATISTICS")
print("=" * 60)
print(f"Total images processed:    {stats['total']}")
print(f"Images with ball(s):       {stats['with_ball']} ({stats['with_ball']/stats['total']*100:.1f}%)")
print(f"Images with person(s):     {stats['with_person']} ({stats['with_person']/stats['total']*100:.1f}%)")
print(f"Images with no detections: {stats['empty']} ({stats['empty']/stats['total']*100:.1f}%)")
print("=" * 60)

# Verify dataset consistency
num_images = len(list(IRL_IMAGES.glob("*")))
num_labels = len(list(IRL_LABELS.glob("*.png")))

print(f"\nâœ“ Dataset consistency check:")
print(f"  Images: {num_images}")
print(f"  Labels: {num_labels}")
print(f"  Match: {'âœ“ YES' if num_images == num_labels else 'âœ— NO'}")

print(f"\nâœ“ Dataset ready at: {IRL_READY}")
print(f"  - images/  ({num_images} files)")
print(f"  - labels/  ({num_labels} .png masks)")

## Step 5: Segment Persons & Combine Masks

In [None]:
# Get all images
img_paths = list(IRL_RAW.glob("*.jpg")) + list(IRL_RAW.glob("*.jpeg")) + \
            list(IRL_RAW.glob("*.JPG")) + list(IRL_RAW.glob("*.JPEG"))

print(f"Processing {len(img_paths)} images for ball segmentation...")
print(f"Pipeline: Roboflow detection â†’ FastSAM segmentation â†’ YOLO txt")
print()

# Process each image with ball detection + segmentation pipeline
for img_path in tqdm(img_paths, desc="Ball segmentation"):
    img_pipeline(
        img_path,
        detect_fn=ball_detector.detect,
        segment_fn=ball_segmenter.segment_bbox,
        det_output_dir=BALL_DET_OUTPUT,
        seg_output_dir=BALL_SEG_OUTPUT,
        txt_output_dir=BALL_TXT_OUTPUT,
        mode="bbox"  # Use bbox mode for FastSAM
    )

print("âœ“ Ball segmentation complete!")

## Step 4: Segment Balls (Roboflow + FastSAM)

## Step 3: Load Models & Configure Roboflow

In [None]:
# Input path
IRL_RAW = Path("datasets/raw/IRL_validation_pictures")

# Output paths
IRL_READY = Path("datasets/ready/IRL_dataset")
IRL_IMAGES = IRL_READY / "images"
IRL_LABELS = IRL_READY / "labels"

# Intermediate outputs for ball detection+segmentation
BALL_DET_OUTPUT = Path("detection_output_folder/irl_balls")
BALL_SEG_OUTPUT = Path("seg_output_folder/irl_balls")
BALL_TXT_OUTPUT = Path("txt_output_folder/irl_balls")

# Create directories
for dir_path in [IRL_IMAGES, IRL_LABELS, BALL_DET_OUTPUT, BALL_SEG_OUTPUT, BALL_TXT_OUTPUT]:
    dir_path.mkdir(parents=True, exist_ok=True)

# Device
DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu'

print(f"Device: {DEVICE}")
print(f"Input: {IRL_RAW}")
print(f"Output: {IRL_READY}")
print(f"Found {len(list(IRL_RAW.glob('*.jpg')) + list(IRL_RAW.glob('*.jpeg')) + list(IRL_RAW.glob('*.JPG')) + list(IRL_RAW.glob('*.JPEG')))} images")

## Step 2: Configure Paths & Create Directories

## Step 1: Setup & Imports