# 01 - Training Collar Detector

Fine-tuning di YOLOv8n per la detection di collare/guinzaglio sui cani.

## Dataset
- **Labeling Platform Merged** (default): ~7,500+ immagini dalla piattaforma di labeling
  - Dog-with-Leash (classe 0) - con collare/guinzaglio
  - Dog-without-Leash (classe 1) - senza collare/guinzaglio
- **Dog with Leash** (Roboflow, fallback): ~152 immagini

## Output
- `P(no_collar)` ‚àà [0, 1] - probabilit√† che il cane NON abbia collare

## Usage
1. Esegui `merge_exports.py` per preparare il dataset dalla piattaforma di labeling
2. Esegui questo notebook per il training

In [None]:
# Installazione dipendenze
%pip install ultralytics torch torchvision albumentations matplotlib seaborn pandas scikit-learn -q

In [None]:
import os
import sys
from pathlib import Path
import yaml
import shutil
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from datetime import datetime
import random

from ultralytics import YOLO
from sklearn.model_selection import train_test_split

print(f"Python: {sys.version}")

In [None]:
# Configurazione paths - RELATIVI per portabilit√†
# Priorit√†: dataset merged dalla piattaforma di labeling, fallback a Roboflow
import sys
sys.path.insert(0, str(Path.cwd()))

try:
    from notebook_utils import get_paths, get_device, print_paths
    paths = get_paths()
    print_paths(paths)
except ImportError:
    print("notebook_utils.py non trovato, usando fallback...")
    # Fallback manuale
    NOTEBOOK_DIR = Path.cwd()
    if NOTEBOOK_DIR.name == "notebooks":
        PROJECT_DIR = NOTEBOOK_DIR.parent.parent
    elif NOTEBOOK_DIR.name == "training":
        PROJECT_DIR = NOTEBOOK_DIR.parent
    else:
        PROJECT_DIR = NOTEBOOK_DIR
        while PROJECT_DIR.name != "ResQPet" and PROJECT_DIR.parent != PROJECT_DIR:
            PROJECT_DIR = PROJECT_DIR.parent
    BASE_DIR = PROJECT_DIR.parent
    paths = {
        'project_dir': PROJECT_DIR,
        'base_dir': BASE_DIR,
        'weights_dir': PROJECT_DIR / "weights",
        'collar_dataset': BASE_DIR / "Dog with Leash",
        'runs_dir': PROJECT_DIR / "training" / "runs",
        'notebooks_dir': PROJECT_DIR / "training" / "notebooks",
    }
    paths['weights_dir'].mkdir(parents=True, exist_ok=True)

# ============================================================================
# SELEZIONE DATASET
# ============================================================================
# Priorit√† 1: Dataset merged dalla piattaforma di labeling (7,500+ immagini)
# Priorit√† 2: Dataset Roboflow "Dog with Leash" (152 immagini)

MERGED_DATASET_DIR = paths['project_dir'] / "labeling_data" / "exports" / "collar_yolo"
ROBOFLOW_DATASET_DIR = paths['collar_dataset']

# Controlla se il dataset merged esiste
if MERGED_DATASET_DIR.exists() and (MERGED_DATASET_DIR / "data.yaml").exists():
    DATASET_DIR = MERGED_DATASET_DIR
    USE_MERGED = True
    print("‚úì Usando dataset MERGED dalla piattaforma di labeling")
elif ROBOFLOW_DATASET_DIR.exists():
    DATASET_DIR = ROBOFLOW_DATASET_DIR
    USE_MERGED = False
    print("! Dataset merged non trovato, usando Roboflow 'Dog with Leash'")
else:
    raise FileNotFoundError(
        f"Nessun dataset trovato!\n"
        f"  - Merged: {MERGED_DATASET_DIR}\n"
        f"  - Roboflow: {ROBOFLOW_DATASET_DIR}\n\n"
        f"Esegui prima: python -m labeling_tool.scripts.merge_exports"
    )

# Assegna variabili per retrocompatibilit√†
BASE_DIR = paths['base_dir']
OUTPUT_DIR = paths['weights_dir']
SPLIT_DIR = DATASET_DIR / "split_dataset" if not USE_MERGED else DATASET_DIR

print(f"\nDataset: {DATASET_DIR}")
print(f"Tipo: {'Merged (7,500+ img)' if USE_MERGED else 'Roboflow (152 img)'}")
print(f"Output weights: {OUTPUT_DIR}")
print(f"Dataset exists: {DATASET_DIR.exists()}")

## 1. Esplorazione Dataset Originale

In [None]:
# Analizza struttura del dataset
print(f"Struttura dataset ({DATASET_DIR.name}):")
for item in sorted(DATASET_DIR.iterdir()):
    if item.is_dir():
        sub_items = list(item.iterdir())
        print(f"  {item.name}/")
        for sub in sub_items[:5]:
            if sub.is_dir():
                count = len(list(sub.glob('*.*')))
                print(f"      {sub.name}/ ({count} files)")
            else:
                print(f"      {sub.name}")
        if len(sub_items) > 5:
            print(f"      ... e altri {len(sub_items) - 5} elementi")
    else:
        print(f"  {item.name}")

In [None]:
# Leggi configurazione dataset
data_yaml = DATASET_DIR / "data.yaml"

if data_yaml.exists():
    with open(data_yaml, 'r') as f:
        original_config = yaml.safe_load(f)
    
    print("Configurazione dataset:")
    for key, value in original_config.items():
        if key not in ['roboflow']:  # Skip metadata verbose
            print(f"  {key}: {value}")
else:
    print(f"File {data_yaml} non trovato!")
    original_config = {'names': {0: 'Dog-with-Leash', 1: 'Dog-without-Leash'}, 'nc': 2}

In [None]:
# Conta immagini disponibili
if USE_MERGED:
    # Dataset merged: immagini gi√† divise in train/val
    train_images_dir = DATASET_DIR / 'images' / 'train'
    train_labels_dir = DATASET_DIR / 'labels' / 'train'
    val_images_dir = DATASET_DIR / 'images' / 'val'
    val_labels_dir = DATASET_DIR / 'labels' / 'val'
    
    train_images = list(train_images_dir.glob('*.jpg')) + list(train_images_dir.glob('*.png'))
    val_images = list(val_images_dir.glob('*.jpg')) + list(val_images_dir.glob('*.png'))
    all_images = train_images + val_images
    
    print(f"Immagini trovate:")
    print(f"  - Train: {len(train_images)}")
    print(f"  - Val: {len(val_images)}")
    print(f"  - Totale: {len(all_images)}")
    
    # Verifica labels
    train_with_labels = sum(1 for img in train_images if (train_labels_dir / f"{img.stem}.txt").exists())
    val_with_labels = sum(1 for img in val_images if (val_labels_dir / f"{img.stem}.txt").exists())
    print(f"\nImmagini con labels:")
    print(f"  - Train: {train_with_labels}")
    print(f"  - Val: {val_with_labels}")
else:
    # Dataset Roboflow: solo cartella train
    train_images_dir = DATASET_DIR / 'train' / 'images'
    train_labels_dir = DATASET_DIR / 'train' / 'labels'
    
    all_images = list(train_images_dir.glob('*.jpg')) + list(train_images_dir.glob('*.png'))
    print(f"Immagini trovate: {len(all_images)}")
    
    # Verifica labels
    images_with_labels = sum(1 for img in all_images if (train_labels_dir / f"{img.stem}.txt").exists())
    print(f"Immagini con labels: {images_with_labels}")

In [None]:
# Analizza distribuzione classi
class_counts = {0: 0, 1: 0}  # 0=with-leash, 1=without-leash

if USE_MERGED:
    # Analizza sia train che val
    label_dirs = [
        DATASET_DIR / 'labels' / 'train',
        DATASET_DIR / 'labels' / 'val'
    ]
else:
    label_dirs = [DATASET_DIR / 'train' / 'labels']

for label_dir in label_dirs:
    if not label_dir.exists():
        continue
    for label_file in label_dir.glob('*.txt'):
        with open(label_file, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if parts:
                    cls = int(parts[0])
                    if cls in class_counts:
                        class_counts[cls] += 1

print("\nDistribuzione classi:")
class_names = original_config.get('names', {0: 'Dog-with-Leash', 1: 'Dog-without-Leash'})
if isinstance(class_names, list):
    class_names = {i: name for i, name in enumerate(class_names)}

for cls, count in class_counts.items():
    name = class_names.get(cls, f"Class {cls}")
    print(f"  {name}: {count} annotazioni")

# Visualizza
plt.figure(figsize=(8, 5))
plt.bar([class_names[0], class_names[1]], [class_counts[0], class_counts[1]], 
        color=['green', 'red'])
plt.title('Distribuzione Classi nel Dataset')
plt.ylabel('Numero annotazioni')
plt.tight_layout()
plt.show()

In [None]:
# Visualizza alcune immagini di esempio
import cv2

# Seleziona immagini da visualizzare
if USE_MERGED:
    sample_dir = DATASET_DIR / 'images' / 'train'
    labels_dir = DATASET_DIR / 'labels' / 'train'
else:
    sample_dir = DATASET_DIR / 'train' / 'images'
    labels_dir = DATASET_DIR / 'train' / 'labels'

sample_images = sorted(sample_dir.glob('*.jpg'))[:6]
if len(sample_images) < 6:
    sample_images += sorted(sample_dir.glob('*.png'))[:6 - len(sample_images)]

fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

for i, img_path in enumerate(sample_images):
    img = cv2.imread(str(img_path))
    if img is None:
        continue
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Carica annotation
    label_path = labels_dir / f"{img_path.stem}.txt"
    
    axes[i].imshow(img)
    axes[i].set_title(img_path.name, fontsize=10)
    axes[i].axis('off')
    
    # Mostra bbox
    if label_path.exists():
        with open(label_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) >= 5:
                    cls = int(parts[0])
                    cx, cy, w, h = float(parts[1]), float(parts[2]), float(parts[3]), float(parts[4])
                    
                    # Convert YOLO format to pixels
                    img_h, img_w = img.shape[:2]
                    x1 = int((cx - w/2) * img_w)
                    y1 = int((cy - h/2) * img_h)
                    box_w = int(w * img_w)
                    box_h = int(h * img_h)
                    
                    color = 'green' if cls == 0 else 'red'  # 0=with-leash, 1=without
                    label = 'With Collar' if cls == 0 else 'Without Collar'
                    rect = plt.Rectangle((x1, y1), box_w, box_h, 
                                         fill=False, edgecolor=color, linewidth=2)
                    axes[i].add_patch(rect)
                    axes[i].text(x1, y1-5, label, color=color, fontsize=8, 
                                fontweight='bold', backgroundcolor='white')

# Nascondi assi vuoti
for j in range(len(sample_images), 6):
    axes[j].axis('off')

dataset_name = "Merged Dataset" if USE_MERGED else "Roboflow Dataset"
plt.suptitle(f'Esempi dal {dataset_name}', fontsize=14)
plt.tight_layout()
plt.savefig(paths['notebooks_dir'] / 'collar_dataset_samples.png', dpi=150)
plt.show()

## 2. Creazione Split Train/Val/Test

**Nota**: Se stai usando il dataset merged dalla piattaforma di labeling, lo split √® gi√† stato fatto dal merge script (80% train, 20% val).

Se invece stai usando il dataset Roboflow (che ha solo `train`), creiamo uno split:
- Train: 70%
- Validation: 20%
- Test: 10%

In [None]:
if USE_MERGED:
    # Dataset merged gi√† ha train/val split - skip creazione
    print("‚úì Dataset merged gi√† contiene split train/val")
    print(f"  - Train: {len(train_images)} immagini")
    print(f"  - Val: {len(val_images)} immagini")
    
    # Stats per compatibilit√† con celle successive
    stats = {
        'train': len(train_images),
        'valid': len(val_images),
        'test': 0  # Non c'√® test set nel merged
    }
    
    # Config path √® gi√† il data.yaml del dataset merged
    config_path = DATASET_DIR / 'data.yaml'
    
else:
    # Dataset Roboflow: crea split manualmente
    def create_dataset_split(images, labels_dir, output_dir, train_ratio=0.7, val_ratio=0.2, seed=42):
        """Crea split train/val/test del dataset"""
        random.seed(seed)
        np.random.seed(seed)
        
        images = list(images)
        random.shuffle(images)
        
        n = len(images)
        train_end = int(n * train_ratio)
        val_end = int(n * (train_ratio + val_ratio))
        
        splits = {
            'train': images[:train_end],
            'valid': images[train_end:val_end],
            'test': images[val_end:]
        }
        
        print(f"Split dataset:")
        print(f"  Train: {len(splits['train'])} images ({len(splits['train'])/n*100:.1f}%)")
        print(f"  Valid: {len(splits['valid'])} images ({len(splits['valid'])/n*100:.1f}%)")
        print(f"  Test:  {len(splits['test'])} images ({len(splits['test'])/n*100:.1f}%)")
        
        for split_name, split_images in splits.items():
            img_dir = output_dir / split_name / 'images'
            lbl_dir = output_dir / split_name / 'labels'
            img_dir.mkdir(parents=True, exist_ok=True)
            lbl_dir.mkdir(parents=True, exist_ok=True)
            
            for img_path in split_images:
                shutil.copy(img_path, img_dir / img_path.name)
                label_path = labels_dir / f"{img_path.stem}.txt"
                if label_path.exists():
                    shutil.copy(label_path, lbl_dir / label_path.name)
        
        return splits

    # Rimuovi split precedente se esiste
    if SPLIT_DIR.exists():
        print(f"Rimuovo split precedente: {SPLIT_DIR}")
        shutil.rmtree(SPLIT_DIR)

    print(f"\nCreazione split in: {SPLIT_DIR}")
    splits = create_dataset_split(all_images, train_labels_dir, SPLIT_DIR)
    
    stats = {
        'train': len(splits['train']),
        'valid': len(splits['valid']),
        'test': len(splits['test'])
    }

    print(f"\n‚úì Split completato!")

In [None]:
# Verifica dataset
print("Verifica dataset:")

if USE_MERGED:
    # Verifica struttura merged
    for split in ['train', 'val']:
        img_dir = DATASET_DIR / 'images' / split
        lbl_dir = DATASET_DIR / 'labels' / split
        
        n_images = len(list(img_dir.glob('*.*')))
        n_labels = len(list(lbl_dir.glob('*.txt')))
        
        print(f"  {split}: {n_images} images, {n_labels} labels")
    
    print(f"\nTotale: {stats['train'] + stats['valid']} images")
else:
    # Verifica split creato
    for split in ['train', 'valid', 'test']:
        img_dir = SPLIT_DIR / split / 'images'
        lbl_dir = SPLIT_DIR / split / 'labels'
        
        n_images = len(list(img_dir.glob('*.*')))
        n_labels = len(list(lbl_dir.glob('*.txt')))
        
        stats[split] = n_images
        print(f"  {split}: {n_images} images, {n_labels} labels")

    print(f"\nTotale: {sum(stats.values())} images")

## 3. Configurazione YOLOv8

In [None]:
# Configurazione YAML per training
if USE_MERGED:
    # Usa il data.yaml esistente del dataset merged
    config_path = DATASET_DIR / 'data.yaml'
    print(f"Usando config esistente: {config_path}")
    
    with open(config_path, 'r') as f:
        training_config = yaml.safe_load(f)
else:
    # Crea file di configurazione YAML per dataset Roboflow splittato
    training_config = {
        'path': str(SPLIT_DIR),
        'train': 'train/images',
        'val': 'valid/images',
        'test': 'test/images',
        'names': {
            0: 'Dog-with-Leash',
            1: 'Dog-without-Leash'
        },
        'nc': 2
    }

    config_path = SPLIT_DIR / 'data.yaml'
    with open(config_path, 'w') as f:
        yaml.dump(training_config, f, default_flow_style=False)
    
    print(f"Config salvata in: {config_path}")

print("\nContenuto config:")
with open(config_path, 'r') as f:
    print(f.read())

## 4. Training YOLOv8n

In [None]:
# Carica modello pre-trained
print("Caricamento YOLOv8n pre-trained...")
model = YOLO('yolov8n.pt')  # Nano version for speed
print("Modello caricato!")

In [None]:
# Configurazione training
# Parametri adattati in base alla dimensione del dataset e hardware

# Assicurati che stats esista
if 'stats' not in dir():
    if USE_MERGED:
        train_count = len(list((DATASET_DIR / 'images' / 'train').glob('*.*')))
        val_count = len(list((DATASET_DIR / 'images' / 'val').glob('*.*')))
        stats = {'train': train_count, 'valid': val_count, 'test': 0}
    else:
        stats = {'train': 0, 'valid': 0, 'test': 0}

# ============================================================================
# CONFIGURAZIONE HARDWARE
# ============================================================================
# Rileva automaticamente le GPU disponibili
import torch

if torch.cuda.is_available():
    NUM_GPUS = torch.cuda.device_count()
    GPU_NAMES = [torch.cuda.get_device_name(i) for i in range(NUM_GPUS)]
    TOTAL_VRAM = sum(torch.cuda.get_device_properties(i).total_memory for i in range(NUM_GPUS)) / 1e9
    
    print(f"üñ•Ô∏è  GPU rilevate: {NUM_GPUS}")
    for i, name in enumerate(GPU_NAMES):
        vram = torch.cuda.get_device_properties(i).total_memory / 1e9
        print(f"   [{i}] {name} ({vram:.0f}GB)")
    
    # Configura device per multi-GPU
    if NUM_GPUS >= 2:
        DEVICE = list(range(NUM_GPUS))  # [0, 1] per 2 GPU
        BATCH_SIZE = 128  # 64 per GPU con 2x 5090
        WORKERS = 8
        print(f"\n‚úì Multi-GPU attivo: {DEVICE}")
    else:
        DEVICE = 0
        BATCH_SIZE = 64
        WORKERS = 4
        
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    DEVICE = 'mps'
    BATCH_SIZE = 16
    WORKERS = 4
    NUM_GPUS = 1
    print("üçé Apple Silicon MPS")
else:
    DEVICE = 'cpu'
    BATCH_SIZE = 8
    WORKERS = 2
    NUM_GPUS = 0
    print("‚ö†Ô∏è  Nessuna GPU, usando CPU")

# ============================================================================
# PARAMETRI TRAINING
# ============================================================================
if USE_MERGED:
    EPOCHS = 100
    PATIENCE = 20
else:
    EPOCHS = 150
    PATIENCE = 30

IMG_SIZE = 640

training_args = {
    'data': str(config_path),
    'epochs': EPOCHS,
    'batch': BATCH_SIZE,
    'imgsz': IMG_SIZE,
    'patience': PATIENCE,
    'save': True,
    'save_period': 20,
    'device': DEVICE,
    'workers': WORKERS,
    'project': str(paths['runs_dir']),
    'name': 'collar_detector',
    'exist_ok': True,
    'pretrained': True,
    'optimizer': 'AdamW',
    'lr0': 0.001,
    'lrf': 0.01,
    'weight_decay': 0.0005,
    'amp': True,  # Mixed precision FP16 per velocit√†
    
    # Cache dataset in RAM se disponibile (velocizza training)
    # 'cache': 'ram',  # Decommentare se hai 64GB+ RAM
    
    # Augmentation (meno aggressiva per dataset grande)
    'hsv_h': 0.02,
    'hsv_s': 0.7 if USE_MERGED else 0.8,
    'hsv_v': 0.4 if USE_MERGED else 0.5,
    'degrees': 15 if USE_MERGED else 20,
    'translate': 0.1 if USE_MERGED else 0.15,
    'scale': 0.5 if USE_MERGED else 0.6,
    'shear': 5 if USE_MERGED else 10,
    'perspective': 0.0005,
    'flipud': 0.2 if USE_MERGED else 0.3,
    'fliplr': 0.5,
    'mosaic': 1.0,
    'mixup': 0.1 if USE_MERGED else 0.2,
    'copy_paste': 0.05 if USE_MERGED else 0.1,
}

print(f"\n{'='*60}")
print(f"CONFIGURAZIONE TRAINING")
print(f"{'='*60}")
print(f"Dataset:      {stats['train']} train, {stats['valid']} val")
print(f"Device:       {DEVICE} ({NUM_GPUS} GPU)")
print(f"Batch size:   {BATCH_SIZE}")
print(f"Epochs:       {EPOCHS}")
print(f"Image size:   {IMG_SIZE}")
print(f"Workers:      {WORKERS}")
print(f"Mixed Prec:   {training_args['amp']}")
print(f"{'='*60}")

In [None]:
# TRAINING
print("="*60)
print("INIZIO TRAINING COLLAR DETECTOR")
print("="*60)
print(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Dataset: {stats['train']} train, {stats['valid']} val")
print()

# Esegui training
results = model.train(**training_args)

print("\n" + "="*60)
print("TRAINING COMPLETATO!")
print("="*60)

## 5. Valutazione Modello

In [None]:
# Carica il best model
results_dir = paths['runs_dir'] / 'collar_detector'
best_model_path = results_dir / 'weights' / 'best.pt'

print(f"Cercando modello in: {best_model_path}")

if best_model_path.exists():
    best_model = YOLO(str(best_model_path))
    print(f"‚úì Best model caricato da: {best_model_path}")
else:
    # Prova path alternativo
    alt_path = paths['project_dir'] / 'training' / 'runs' / 'collar_detector' / 'weights' / 'best.pt'
    if alt_path.exists():
        best_model_path = alt_path
        best_model = YOLO(str(best_model_path))
        print(f"‚úì Best model caricato da path alternativo: {best_model_path}")
    else:
        print(f"‚ö†Ô∏è  Best model non trovato in:")
        print(f"   - {best_model_path}")
        print(f"   - {alt_path}")
        print("Usando il modello corrente...")
        best_model = model

In [None]:
# Valutazione su validation set
print("Valutazione su validation set...")
val_results = best_model.val(data=str(config_path))

print("\nMetriche:")
print(f"  mAP50: {val_results.box.map50:.4f}")
print(f"  mAP50-95: {val_results.box.map:.4f}")
print(f"  Precision: {val_results.box.mp:.4f}")
print(f"  Recall: {val_results.box.mr:.4f}")

In [None]:
# Visualizza curve di training
results_csv = results_dir / 'results.csv'

print(f"Cercando risultati in: {results_csv}")

if results_csv.exists():
    df_results = pd.read_csv(results_csv)
    df_results.columns = df_results.columns.str.strip()
    
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # Box Loss
    if 'train/box_loss' in df_results.columns:
        axes[0, 0].plot(df_results['train/box_loss'], label='Train', color='blue')
        if 'val/box_loss' in df_results.columns:
            axes[0, 0].plot(df_results['val/box_loss'], label='Val', color='orange')
        axes[0, 0].set_title('Box Loss')
        axes[0, 0].set_xlabel('Epoch')
        axes[0, 0].set_ylabel('Loss')
        axes[0, 0].legend()
        axes[0, 0].grid(True, alpha=0.3)
    
    # mAP
    if 'metrics/mAP50(B)' in df_results.columns:
        axes[0, 1].plot(df_results['metrics/mAP50(B)'], label='mAP50', color='green')
        if 'metrics/mAP50-95(B)' in df_results.columns:
            axes[0, 1].plot(df_results['metrics/mAP50-95(B)'], label='mAP50-95', color='purple')
        axes[0, 1].set_title('mAP Metrics')
        axes[0, 1].set_xlabel('Epoch')
        axes[0, 1].set_ylabel('mAP')
        axes[0, 1].legend()
        axes[0, 1].grid(True, alpha=0.3)
    
    # Precision/Recall
    if 'metrics/precision(B)' in df_results.columns:
        axes[1, 0].plot(df_results['metrics/precision(B)'], label='Precision', color='blue')
        axes[1, 0].plot(df_results['metrics/recall(B)'], label='Recall', color='red')
        axes[1, 0].set_title('Precision & Recall')
        axes[1, 0].set_xlabel('Epoch')
        axes[1, 0].set_ylabel('Score')
        axes[1, 0].legend()
        axes[1, 0].grid(True, alpha=0.3)
    
    # Class Loss
    if 'train/cls_loss' in df_results.columns:
        axes[1, 1].plot(df_results['train/cls_loss'], label='Train', color='blue')
        if 'val/cls_loss' in df_results.columns:
            axes[1, 1].plot(df_results['val/cls_loss'], label='Val', color='orange')
        axes[1, 1].set_title('Classification Loss')
        axes[1, 1].set_xlabel('Epoch')
        axes[1, 1].set_ylabel('Loss')
        axes[1, 1].legend()
        axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    save_path = results_dir / 'training_curves.png'
    save_path.parent.mkdir(parents=True, exist_ok=True)
    plt.savefig(save_path, dpi=150)
    print(f"‚úì Salvato: {save_path}")
    plt.show()
else:
    print(f"‚ö†Ô∏è  Results CSV non trovato in {results_csv}")
    print(f"   Verifica: ls {results_dir}")

In [None]:
# Test su immagini del validation/test set
if USE_MERGED:
    test_images_dir = DATASET_DIR / 'images' / 'val'  # Usa val per test
else:
    test_images_dir = SPLIT_DIR / 'test' / 'images'

test_images = list(test_images_dir.glob('*.*'))[:6]

if test_images:
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    axes = axes.flatten()
    
    for i, img_path in enumerate(test_images):
        # Inference
        results = best_model(str(img_path), verbose=False)
        
        # Visualizza con annotazioni
        annotated = results[0].plot()
        annotated = cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB)
        
        axes[i].imshow(annotated)
        axes[i].set_title(img_path.name, fontsize=10)
        axes[i].axis('off')
    
    # Nascondi assi vuoti
    for j in range(len(test_images), 6):
        axes[j].axis('off')
    
    plt.suptitle('Predizioni su Validation Set', fontsize=14)
    plt.tight_layout()
    
    # Salva nella directory corretta
    save_path = results_dir / 'test_predictions.png'
    save_path.parent.mkdir(parents=True, exist_ok=True)
    plt.savefig(save_path, dpi=150)
    print(f"‚úì Salvato: {save_path}")
    plt.show()
else:
    print("Nessuna immagine nel validation/test set")

## 6. Export Modello

In [None]:
# Copia il best model nella cartella weights del progetto
final_model_path = OUTPUT_DIR / 'collar_detector.pt'

print(f"Cercando best model in: {best_model_path}")

if best_model_path.exists():
    shutil.copy(best_model_path, final_model_path)
    print(f"‚úì Modello salvato in: {final_model_path}")
    print(f"  Dimensione: {final_model_path.stat().st_size / 1024 / 1024:.2f} MB")
else:
    # Prova last.pt
    last_model_path = best_model_path.parent / 'last.pt'
    if last_model_path.exists():
        shutil.copy(last_model_path, final_model_path)
        print(f"‚úì Last model salvato in: {final_model_path}")
        print(f"  Dimensione: {final_model_path.stat().st_size / 1024 / 1024:.2f} MB")
    else:
        print("‚ö†Ô∏è  Nessun modello trovato!")
        print(f"   Verifica manualmente: {results_dir / 'weights'}")

In [None]:
# Test finale del modello esportato
if final_model_path.exists():
    print("Test modello esportato...")
    
    exported_model = YOLO(str(final_model_path))
    
    # Test su un'immagine
    if test_images:
        test_result = exported_model(str(test_images[0]), verbose=False)
        
        print(f"\nTest su: {test_images[0].name}")
        print(f"Detections: {len(test_result[0].boxes)}")
        
        for box in test_result[0].boxes:
            cls = int(box.cls[0])
            conf = float(box.conf[0])
            cls_name = training_config['names'][cls]
            print(f"  - {cls_name}: {conf:.2%}")

In [None]:
# Funzione helper per ottenere P(no_collar)
def get_no_collar_probability(model, image_path):
    """
    Analizza un'immagine e ritorna P(no_collar)
    
    Returns:
        float: probabilit√† che il cane NON abbia collare [0, 1]
    """
    results = model(image_path, verbose=False)
    
    # Se non ci sono detection, assumiamo incertezza
    if len(results[0].boxes) == 0:
        return 0.7  # Default: probabilmente senza collare (non rilevato)
    
    # Prendi la detection con confidence pi√π alta
    best_conf = 0
    best_cls = None
    
    for box in results[0].boxes:
        conf = float(box.conf[0])
        if conf > best_conf:
            best_conf = conf
            best_cls = int(box.cls[0])
    
    # Classe 0 = con guinzaglio, Classe 1 = senza guinzaglio
    if best_cls == 0:  # Dog-with-Leash
        return 1.0 - best_conf  # Bassa probabilit√† di essere senza
    else:  # Dog-without-Leash
        return best_conf  # Alta probabilit√† di essere senza

# Test
if test_images and final_model_path.exists():
    print("\nTest funzione get_no_collar_probability:")
    for img_path in test_images[:3]:
        p_no_collar = get_no_collar_probability(exported_model, str(img_path))
        print(f"  {img_path.name}: P(no_collar) = {p_no_collar:.2%}")

In [None]:
# Riepilogo finale
print("\n" + "="*60)
print("RIEPILOGO TRAINING COLLAR DETECTOR")
print("="*60)

print(f"\nDataset: {'Merged (Piattaforma Labeling)' if USE_MERGED else 'Roboflow Dog with Leash'}")
print(f"   Path: {DATASET_DIR}")
print(f"   - Train: {stats.get('train', 0)} images")
print(f"   - Valid: {stats.get('valid', 0)} images")
if not USE_MERGED:
    print(f"   - Test:  {stats.get('test', 0)} images")

print(f"\nTraining:")
print(f"   - Model: YOLOv8n")
print(f"   - Epochs: {EPOCHS}")
print(f"   - Batch size: {BATCH_SIZE}")
print(f"   - Image size: {IMG_SIZE}")

print(f"\nRisultati:")
print(f"   - mAP50: {val_results.box.map50:.4f}")
print(f"   - mAP50-95: {val_results.box.map:.4f}")
print(f"   - Precision: {val_results.box.mp:.4f}")
print(f"   - Recall: {val_results.box.mr:.4f}")

print(f"\nModello salvato: {final_model_path}")
if final_model_path.exists():
    print(f"   Dimensione: {final_model_path.stat().st_size / 1024 / 1024:.2f} MB")

print("\n" + "="*60)
print("Per usare il modello:")
print(f"  from ultralytics import YOLO")
print(f"  model = YOLO('{final_model_path}')")
print("="*60)