In [1]:
# ============================================================================
# CELLA 1: Setup Progetto (usando struttura esistente)
# ============================================================================

from google.colab import drive
import sys
import os
from pathlib import Path

print(" AML Semantic Correspondence - Training-Free Baseline\n")

# 1. Mount Google Drive (se non già montato)
if not Path('/content/drive').exists():
    drive.mount('/content/drive')
    print(" Google Drive mounted\n")
else:
    print(" Google Drive already mounted\n")

# 2. Usa la tua struttura esistente
PROJECT_ROOT = '/content/drive/MyDrive/AML'
DATA_DIR = f'{PROJECT_ROOT}/dataset' # Reverted to 'dataset' as per original context
CHECKPOINT_DIR = f'{PROJECT_ROOT}/checkpoints'
RESULTS_DIR = f'{PROJECT_ROOT}/results'

# Ensure these directories exist (they will be created inside MyDrive/AML)
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(CHECKPOINT_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)

# 3. Clone/Copy repository files
GITHUB_REPO_URL = 'https://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@github.com/SamueleCarrea/AML_SemanticCorrespondence'
LOCAL_REPO_NAME = 'AML_SemanticCorrespondence'

if not Path(LOCAL_REPO_NAME).exists():
    print(f"\n Cloning repository {GITHUB_REPO_URL} into {LOCAL_REPO_NAME}...")
    !git clone {GITHUB_REPO_URL} {LOCAL_REPO_NAME}
    print(" Repository cloned")
else:
    print(f"\n Repository {LOCAL_REPO_NAME} already exists.")
    # Check if it's a git repo before trying to pull
    if Path(LOCAL_REPO_NAME, '.git').exists():
        print(" Pulling latest changes...")
        %cd {LOCAL_REPO_NAME}
        !git pull
        %cd ..
        print(" Repository updated")
    else:
        print(" Directory exists but is not a Git repository. Skipping pull.")

# Aggiungi al path la directory locale del repository
sys.path.insert(0, LOCAL_REPO_NAME)

# 4. Verifica GPU
import torch
print(f"\n  GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'No GPU'}")
if torch.cuda.is_available():
    print(f"   VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

print("\n Setup complete!\n")

 AML Semantic Correspondence - Training-Free Baseline

Mounted at /content/drive
 Google Drive mounted


 Cloning repository https://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@github.com/SamueleCarrea/AML_SemanticCorrespondence into AML_SemanticCorrespondence...
Cloning into 'AML_SemanticCorrespondence'...
remote: Enumerating objects: 264, done.[K
remote: Counting objects: 100% (61/61), done.[K
remote: Compressing objects: 100% (48/48), done.[K
remote: Total 264 (delta 29), reused 40 (delta 13), pack-reused 203 (from 1)[K
Receiving objects: 100% (264/264), 131.04 KiB | 16.38 MiB/s, done.
Resolving deltas: 100% (107/107), done.
 Repository cloned

  GPU: Tesla T4
   VRAM: 15.8 GB

 Setup complete!



In [2]:
%cd {LOCAL_REPO_NAME}
!git pull
!git checkout eval
%cd ..

/content/AML_SemanticCorrespondence
Already up to date.
Branch 'eval' set up to track remote branch 'eval' from 'origin'.
Switched to a new branch 'eval'
/content


In [3]:
# ============================================================================
# CELLA 2: Install Dependencies
# ============================================================================

print(" Installing dependencies...\n")

# Installa da requirements.txt clonato
!pip install -q -r {LOCAL_REPO_NAME}/requirements.txt

# Verifica veloce
import torch
print(f"\n PyTorch {torch.__version__}")
print(f" CUDA available: {torch.cuda.is_available()}")

print(" Dependencies installed!\n")

 Installing dependencies...

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.4/86.4 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m40.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.5/154.5 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m38.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.0/55.0 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25h
 PyTorch 2.9.0+cu126
 CUDA available: True
 Dependencies installed!



In [4]:
# ============================================================================
# CELLA 3: Load SPair-71k Dataset
# ============================================================================

from dataset.spair import SPairDataset
from torch.utils.data import DataLoader
from pathlib import Path

SPAIR_ROOT = f'{DATA_DIR}/Spair-71k'

if not Path(SPAIR_ROOT).exists():
    raise FileNotFoundError(f"Dataset not found: {SPAIR_ROOT}")

# Load dataset
test_dataset = SPairDataset(
    root=SPAIR_ROOT,
    split='test',
    size='large',
    long_side=518,
    normalize=True,
    load_segmentation=False
)

# DataLoader
test_loader = DataLoader(
    test_dataset,
    batch_size=1,
    shuffle=False,
    num_workers=2,
    pin_memory=torch.cuda.is_available()
)

# Sanity check - Mostra tutte le chiavi disponibili
sample = test_dataset[0]
print(f" Loaded {len(test_dataset)} pairs from test split (large)")
print(f" Sample pair: {sample['pair_id']}")
print(f" Image shapes: src={sample['src_img'].shape}, tgt={sample['tgt_img'].shape}")
print(f" Keypoints: {len(sample['src_kps'])} correspondences")
print(f"  Category: {sample['category']}")
print(f"\n Available keys in sample:")
for key in sample.keys():
    if isinstance(sample[key], torch.Tensor):
        print(f"   - {key}: {sample[key].shape} ({sample[key].dtype})")
    else:
        print(f"   - {key}: {sample[key]}")

✅ Loaded 12234 pairs from test split (large)
 Loaded 12234 pairs from test split (large)
 Sample pair: aeroplane:2008_002719-2008_004100
 Image shapes: src=torch.Size([3, 345, 518]), tgt=torch.Size([3, 344, 518])
 Keypoints: 3 correspondences
  Category: aeroplane

 Available keys in sample:
   - src_img: torch.Size([3, 345, 518]) (torch.float32)
   - tgt_img: torch.Size([3, 344, 518]) (torch.float32)
   - src_kps: torch.Size([3, 2]) (torch.float32)
   - tgt_kps: torch.Size([3, 2]) (torch.float32)
   - valid_mask: torch.Size([3]) (torch.bool)
   - category: aeroplane
   - pair_id: aeroplane:2008_002719-2008_004100
   - src_scale: torch.Size([]) (torch.float32)
   - tgt_scale: torch.Size([]) (torch.float32)
   - src_orig_size: torch.Size([2]) (torch.int64)
   - tgt_orig_size: torch.Size([2]) (torch.int64)
   - src_bbox: torch.Size([4]) (torch.float32)
   - tgt_bbox: torch.Size([4]) (torch.float32)


In [5]:
# ============================================================================
# CELLA 4: Unified Backbone Registry & Factory
# ============================================================================

import torch
import torch.nn as nn
import torch.nn.functional as F
from typing import Dict, Tuple
from dataclasses import dataclass

# Import extractors from backbones.py (no circular import now!)
from models.backbones import DINOv2Extractor, DINOv3Extractor, SAMImageEncoder


@dataclass
class BackboneConfig:
    """Configuration for a vision backbone."""
    name: str
    patch_size: int
    embed_dim: int
    hub_name: str
    type: str


# Registry
BACKBONE_REGISTRY = {
    'dinov2_vitb14': BackboneConfig('DINOv2-ViT-B/14', 14, 768, 'dinov2_vitb14', 'dinov2'),
    'dinov3_vitb16': BackboneConfig('DINOv3-ViT-B/16', 16, 768, 'dinov3_vitb16', 'dinov3'),
    'sam_vit_b': BackboneConfig('SAM-ViT-B', 16, 768, 'vit_b', 'sam'),
}


class UnifiedBackbone(nn.Module):
    """Unified interface per tutti i backbones."""

    def __init__(self, backbone_name: str, device: str = 'cuda'):
        super().__init__()

        if backbone_name not in BACKBONE_REGISTRY:
            raise ValueError(f"Unknown backbone: {backbone_name}")

        self.config = BACKBONE_REGISTRY[backbone_name]
        self.device = device

        print(f" Loading {self.config.name}...")

        # Load extractor based on type
        if self.config.type == 'dinov2':
            self.extractor = DINOv2Extractor(
                variant=self.config.hub_name,
                device=device,
                allow_hub_download=True
            )
        elif self.config.type == 'dinov3':
            self.extractor = DINOv3Extractor(
                variant=self.config.hub_name,
                device=device,
                allow_hub_download=True
            )
        elif self.config.type == 'sam':
            self.extractor = SAMImageEncoder(
                variant=self.config.hub_name,
                device=device,
                allow_hub_download=True
            )
        else:
            raise NotImplementedError(f"Type {self.config.type} not implemented")

        print(f" {self.config.name} loaded")
        print(f"   Patch size: {self.extractor.stride}")
        print(f"   Embedding: {self.config.embed_dim}D\n")

    @torch.no_grad()
    def extract_features(self, image: torch.Tensor) -> torch.Tensor:
        """Extract dense features.

        Args:
            image: (B, 3, H, W)
        Returns:
            features: (B, H_patches, W_patches, D)
        """
        # Use extract_feats from backbones.py (handles padding automatically)
        feat_map, stride = self.extractor.extract_feats(image)

        # feat_map is (B, C, H_feat, W_feat)
        # Convert to (B, H_feat, W_feat, C)
        features = feat_map.permute(0, 2, 3, 1)

        return features


# Test registry
print(" Available Backbones:")
print("=" * 60)
for name, config in BACKBONE_REGISTRY.items():
    print(f"   {name:20s} → {config.name}")
print("=" * 60)
print()

 Available Backbones:
   dinov2_vitb14        → DINOv2-ViT-B/14
   dinov3_vitb16        → DINOv3-ViT-B/16
   sam_vit_b            → SAM-ViT-B



In [6]:
# ============================================================================
# CELLA 5: Unified Correspondence Matcher
# ============================================================================

class CorrespondenceMatcher:
    """Semantic correspondence matcher (training-free baseline)."""

    def __init__(self, backbone: UnifiedBackbone):
        self.backbone = backbone
        self.device = backbone.device

    @torch.no_grad()
    def match(
        self,
        src_img: torch.Tensor,
        tgt_img: torch.Tensor,
        src_kps: torch.Tensor
    ) -> torch.Tensor:
        """
        Find correspondences for source keypoints.

        Args:
            src_img: (1, 3, H, W)
            tgt_img: (1, 3, H, W)
            src_kps: (N, 2) in pixel coords (x, y)

        Returns:
            tgt_kps_pred: (N, 2) predicted target keypoints
        """
        # Extract features
        src_feat = self.backbone.extract_features(src_img)[0]  # (H_s, W_s, D)
        tgt_feat = self.backbone.extract_features(tgt_img)[0]  # (H_t, W_t, D)

        H_s, W_s, D = src_feat.shape
        H_t, W_t, _ = tgt_feat.shape

        patch_size = self.backbone.config.patch_size

        # Convert keypoint coords to patch indices
        src_kps_patch = (src_kps / patch_size).long()
        src_kps_patch[:, 0] = src_kps_patch[:, 0].clamp(0, W_s - 1)
        src_kps_patch[:, 1] = src_kps_patch[:, 1].clamp(0, H_s - 1)

        # Match each keypoint
        N = src_kps.shape[0]
        tgt_kps_pred = torch.zeros(N, 2, device=src_kps.device)

        for i in range(N):
            x, y = src_kps_patch[i]
            src_vec = src_feat[y, x]  # (D,)

            # Cosine similarity
            similarity = F.cosine_similarity(
                src_vec.view(1, 1, 1, D),
                tgt_feat.unsqueeze(0),
                dim=-1
            ).squeeze(0)  # (H_t, W_t)

            # Argmax
            max_idx = similarity.flatten().argmax()
            pred_y = max_idx // W_t
            pred_x = max_idx % W_t

            # Convert back to pixels
            tgt_kps_pred[i, 0] = pred_x * patch_size + patch_size // 2
            tgt_kps_pred[i, 1] = pred_y * patch_size + patch_size // 2

        return tgt_kps_pred


print(" Unified matcher ready!")

 Unified matcher ready!


In [7]:
# ============================================================================
# CELLA 6: PCK Metrics Implementation
# ============================================================================

import torch
import numpy as np
from typing import Dict, List

def compute_pck(
    pred_kps: torch.Tensor,
    gt_kps: torch.Tensor,
    image_size: tuple,
    thresholds: List[float] = [0.05, 0.10, 0.15, 0.20]
) -> Dict[str, float]:
    """
    Compute Percentage of Correct Keypoints (PCK) at multiple thresholds.

    Args:
        pred_kps: (N, 2) predicted keypoints in (x, y) format
        gt_kps: (N, 2) ground truth keypoints in (x, y) format
        image_size: (H, W) image dimensions
        thresholds: List of normalized distance thresholds

    Returns:
        Dictionary with PCK@T for each threshold
    """
    H, W = image_size

    # Use max dimension for normalization (standard in SPair-71k)
    max_dim = max(H, W)
    # =======================could be error to calculate euclidian distance =======================
    # Compute Euclidean distance
    distances = torch.norm(pred_kps - gt_kps, dim=1)  # (N,)

    # Normalize by image size
    normalized_distances = distances / max_dim

    # Compute PCK for each threshold
    results = {}
    for t in thresholds:
        correct = (normalized_distances <= t).float()
        pck = correct.mean().item()
        results[f'PCK@{t:.2f}'] = pck

    return results


def compute_pck_per_keypoint(
    pred_kps: torch.Tensor,
    gt_kps: torch.Tensor,
    image_size: tuple,
    thresholds: List[float] = [0.05, 0.10, 0.15, 0.20]
) -> Dict[int, Dict[str, float]]:
    """
    Compute PCK per individual keypoint.

    Returns:
        Dictionary mapping keypoint_id -> {PCK@T metrics}
    """
    H, W = image_size
    max_dim = max(H, W)

    N = pred_kps.shape[0]
    distances = torch.norm(pred_kps - gt_kps, dim=1) / max_dim

    results = {}
    for i in range(N):
        kp_results = {}
        for t in thresholds:
            correct = (distances[i] <= t).float().item()
            kp_results[f'PCK@{t:.2f}'] = correct
        results[i] = kp_results

    return results


# Test metrics
print(" Testing PCK metrics...\n")

# Dummy data
pred = torch.tensor([[100.0, 150.0], [200.0, 250.0], [50.0, 75.0]])
gt = torch.tensor([[105.0, 155.0], [195.0, 245.0], [48.0, 72.0]])
img_size = (480, 640)

pck_results = compute_pck(pred, gt, img_size)

print(" PCK Results (overall):")
for metric, value in pck_results.items():
    print(f"   {metric}: {value:.4f} ({value*100:.2f}%)")

print("\n Metrics implementation working!")

 Testing PCK metrics...

 PCK Results (overall):
   PCK@0.05: 1.0000 (100.00%)
   PCK@0.10: 1.0000 (100.00%)
   PCK@0.15: 1.0000 (100.00%)
   PCK@0.20: 1.0000 (100.00%)

 Metrics implementation working!


In [8]:
# ============================================================================
# CELLA 7: Unified Evaluation Engine
# ============================================================================

from tqdm import tqdm
from collections import defaultdict
import time
import pandas as pd

class UnifiedEvaluator:
    """Evaluation engine per tutti i backbones."""

    def __init__(
        self,
        dataloader,
        device: str = 'cuda',
        thresholds: list = [0.05, 0.10, 0.15, 0.20]
    ):
        self.dataloader = dataloader
        self.device = device
        self.thresholds = thresholds
        self.results = {}

    def evaluate_backbone(
        self,
        backbone_name: str,
        num_samples: int = None
    ) -> dict:
        """
        Evaluate a single backbone.

        Args:
            backbone_name: Name from BACKBONE_REGISTRY
            num_samples: Max number of samples (None = all)
        """
        print(f"\n{'='*70}")
        print(f"EVALUATING: {BACKBONE_REGISTRY[backbone_name].name}")
        print('='*70)

        # Initialize
        backbone = UnifiedBackbone(backbone_name, device=self.device)
        matcher = CorrespondenceMatcher(backbone)

        # Storage
        all_pck = defaultdict(list)
        per_category = defaultdict(lambda: defaultdict(list))
        inference_times = []

        # Evaluation loop
        n_processed = 0
        pbar = tqdm(self.dataloader, desc=f"{backbone_name}")

        for batch in pbar:
            if num_samples and n_processed >= num_samples:
                break

            # Extract data
            src_img = batch['src_img'].to(self.device)
            tgt_img = batch['tgt_img'].to(self.device)
            src_kps = batch['src_kps'][0]  # (N, 2)
            tgt_kps = batch['tgt_kps'][0]
            valid_mask = batch['valid_mask'][0]
            category = batch['category'][0]

            # Filter valid keypoints
            src_kps_valid = src_kps[valid_mask]
            tgt_kps_valid = tgt_kps[valid_mask]

            if len(src_kps_valid) == 0:
                continue

            # Predict (with timing)
            start = time.time()
            tgt_kps_pred = matcher.match(src_img, tgt_img, src_kps_valid)
            inference_times.append(time.time() - start)

            # Compute metrics
            H, W = tgt_img.shape[2:]
            pck_scores = compute_pck(tgt_kps_pred, tgt_kps_valid, (H, W), self.thresholds)

            # Store
            for metric, value in pck_scores.items():
                all_pck[metric].append(value)
                per_category[category][metric].append(value)

            n_processed += 1

            # Update progress bar
            if len(all_pck['PCK@0.10']) > 0:
                avg_pck = np.mean(all_pck['PCK@0.10'])
                pbar.set_postfix({'PCK@0.10': f'{avg_pck:.4f}'})

        # Aggregate results
        results = self._aggregate_results(
            backbone_name, all_pck, per_category, inference_times, n_processed
        )

        self.results[backbone_name] = results
        self._print_summary(results)

        # Cleanup
        del backbone, matcher
        torch.cuda.empty_cache()

        return results

    def _aggregate_results(self, backbone_name, all_pck, per_category, times, n_pairs):
        """Aggregate all metrics."""

        results = {
            'backbone': backbone_name,
            'display_name': BACKBONE_REGISTRY[backbone_name].name,
            'num_pairs': n_pairs,
            'inference_time_ms': np.mean(times) * 1000,
            'overall': {},
            'per_category': {}
        }

        # Overall
        for metric in [f'PCK@{t:.2f}' for t in self.thresholds]:
            values = all_pck[metric]
            results['overall'][metric] = {
                'mean': np.mean(values),
                'std': np.std(values),
            }

        # Per-category
        for cat, metrics in per_category.items():
            results['per_category'][cat] = {}
            for metric in [f'PCK@{t:.2f}' for t in self.thresholds]:
                results['per_category'][cat][metric] = np.mean(metrics[metric])

        return results

    def _print_summary(self, results):
        """Print evaluation summary."""

        print(f"\n {results['display_name']} Results:")
        print("-" * 70)

        for metric, values in results['overall'].items():
            print(f"   {metric}: {values['mean']:.4f} ± {values['std']:.4f}")

        print(f"\n⏱  Avg inference time: {results['inference_time_ms']:.2f} ms/pair")
        print(f" Evaluated on {results['num_pairs']} pairs")

    def compare_all(self) -> pd.DataFrame:
        """Create comparison table."""

        rows = []
        for name, res in self.results.items():
            row = {
                'Backbone': res['display_name'],
                'Pairs': res['num_pairs'],
                'Time (ms)': f"{res['inference_time_ms']:.1f}",
            }

            for metric, vals in res['overall'].items():
                row[metric] = f"{vals['mean']:.4f}"

            rows.append(row)

        df = pd.DataFrame(rows)

        print("\n" + "="*70)
        print("FINAL COMPARISON")
        print("="*70)
        print(df.to_string(index=False))
        print("="*70)

        return df


#Device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

# Initialize evaluator
evaluator = UnifiedEvaluator(
    dataloader=test_loader,
    device=device,
    thresholds=[0.05, 0.10, 0.15, 0.20]
)

print("\n Unified evaluator ready!")

Using device: cuda

 Unified evaluator ready!


In [None]:
# ============================================================================
# CELLA 8A: Evaluate DINOv2-ViT-B/14
# ============================================================================

import json
import pandas as pd

# Configuration
BACKBONE_NAME = 'dinov2_vitb14'
USE_SUBSET = False
NUM_SAMPLES = 100 if USE_SUBSET else None

print(f" Evaluating: {BACKBONE_REGISTRY[BACKBONE_NAME].name}")
print(f"   Samples: {NUM_SAMPLES if NUM_SAMPLES else 'ALL (1814)'}")
print(f"   Device: {device}\n")

# Evaluate
try:
    results_dinov2 = evaluator.evaluate_backbone(BACKBONE_NAME, num_samples=NUM_SAMPLES)

    # Save results immediately
    output_file = f'{RESULTS_DIR}/dinov2_vitb14_results.json'
    with open(output_file, 'w') as f:
        json.dump(results_dinov2, f, indent=2)

    print(f"\n Results saved: {output_file}")

    # Show summary
    print(f"\n DINOv2 Summary:")
    print(f"   PCK@0.10: {results_dinov2['overall']['PCK@0.10']['mean']:.4f}")
    print(f"   Time: {results_dinov2['inference_time_ms']:.2f} ms/pair")

except Exception as e:
    print(f"\n Error: {e}")
    import traceback
    traceback.print_exc()

print("\n DINOv2 evaluation complete!")

 Evaluating: DINOv2-ViT-B/14
   Samples: ALL (1814)
   Device: cuda


EVALUATING: DINOv2-ViT-B/14
 Loading DINOv2-ViT-B/14...
Downloading: "https://github.com/facebookresearch/dinov2/zipball/main" to /root/.cache/torch/hub/main.zip




Downloading: "https://dl.fbaipublicfiles.com/dinov2/dinov2_vitb14/dinov2_vitb14_pretrain.pth" to /root/.cache/torch/hub/checkpoints/dinov2_vitb14_pretrain.pth


100%|██████████| 330M/330M [00:01<00:00, 188MB/s]


 DINOv2-ViT-B/14 loaded
   Patch size: 14
   Embedding: 768D



dinov2_vitb14:   4%|▎         | 441/12234 [03:48<1:36:24,  2.04it/s, PCK@0.10=0.7019]

In [None]:
# ============================================================================
# CELLA 8B: Evaluate DINOv3-ViT-B/14 (con fallback a DINOv2)
# ============================================================================

import json

# Configuration
BACKBONE_NAME = 'dinov3_vitb14'
USE_SUBSET = True
NUM_SAMPLES = 100 if USE_SUBSET else None

print(f" Evaluating: {BACKBONE_REGISTRY[BACKBONE_NAME].name}")
print(f"   Samples: {NUM_SAMPLES if NUM_SAMPLES else 'ALL (1814)'}")
print(f"   Device: {device}\n")

# Evaluate
try:
    results_dinov3 = evaluator.evaluate_backbone(BACKBONE_NAME, num_samples=NUM_SAMPLES)

    # Save results
    output_file = f'{RESULTS_DIR}/dinov3_vitb14_results.json'
    with open(output_file, 'w') as f:
        json.dump(results_dinov3, f, indent=2)

    print(f"\n Results saved: {output_file}")

    # Show summary
    print(f"\n DINOv3 Summary:")
    print(f"   PCK@0.10: {results_dinov3['overall']['PCK@0.10']['mean']:.4f}")
    print(f"   Time: {results_dinov3['inference_time_ms']:.2f} ms/pair")

except Exception as e:
    print(f"\n Error: {e}")
    import traceback
    traceback.print_exc()

print("\n DINOv3 evaluation complete!")

In [None]:
# ============================================================================
# CELLA 8C: Evaluate SAM-ViT-B (OPZIONALE)
# ============================================================================

import json

# NOTA: Richiede installazione:
# !pip install git+https://github.com/facebookresearch/segment-anything.git

# Configuration
BACKBONE_NAME = 'sam_vit_b'
USE_SUBSET = True
NUM_SAMPLES = 100 if USE_SUBSET else None

print(f" Evaluating: {BACKBONE_REGISTRY[BACKBONE_NAME].name}")
print(f"   Samples: {NUM_SAMPLES if NUM_SAMPLES else 'ALL (1814)'}")
print(f"   Device: {device}\n")

# Check if SAM is installed
try:
    import segment_anything
    print(" segment-anything package found\n")
except ImportError:
    print(" SAM not installed. Installing now...")
    !pip install -q git+https://github.com/facebookresearch/segment-anything.git
    print(" Installation complete\n")

# Evaluate
try:
    results_sam = evaluator.evaluate_backbone(BACKBONE_NAME, num_samples=NUM_SAMPLES)

    # Save results
    output_file = f'{RESULTS_DIR}/sam_vit_b_results.json'
    with open(output_file, 'w') as f:
        json.dump(results_sam, f, indent=2)

    print(f"\n Results saved: {output_file}")

    # Show summary
    print(f"\n SAM Summary:")
    print(f"   PCK@0.10: {results_sam['overall']['PCK@0.10']['mean']:.4f}")
    print(f"   Time: {results_sam['inference_time_ms']:.2f} ms/pair")

except Exception as e:
    print(f"\n Error: {e}")
    print(" Tip: SAM might not work well for semantic correspondence")
    import traceback
    traceback.print_exc()

print("\n SAM evaluation complete (or skipped)!")

In [None]:
# ============================================================================
# CELLA 9: Load All Results & Final Comparison
# ============================================================================

import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

print(" Loading all saved results...\n")

# Load all saved results
all_results = {}

result_files = {
    'dinov2_vitb14': f'{RESULTS_DIR}/dinov2_vitb14_results.json',
    'dinov3_vitb14': f'{RESULTS_DIR}/dinov3_vitb14_results.json',
    'sam_vit_b': f'{RESULTS_DIR}/sam_vit_b_results.json',
}

for backbone_name, filepath in result_files.items():
    if Path(filepath).exists():
        with open(filepath, 'r') as f:
            all_results[backbone_name] = json.load(f)
        print(f" Loaded: {backbone_name}")
    else:
        print(f"  Not found: {filepath}")

print(f"\n Loaded {len(all_results)} backbone results\n")

# ============================================================================
# Create Comparison Table
# ============================================================================

def create_comparison_table(results_dict):
    """Create comparison DataFrame."""
    rows = []

    for name, res in results_dict.items():
        row = {
            'Backbone': res['display_name'],
            'Pairs': res['num_pairs'],
            'Time (ms)': f"{res['inference_time_ms']:.1f}",
        }

        for metric, vals in res['overall'].items():
            row[metric] = f"{vals['mean']:.4f}"

        rows.append(row)

    return pd.DataFrame(rows)

comparison_df = create_comparison_table(all_results)

print("=" * 80)
print("FINAL COMPARISON TABLE")
print("=" * 80)
print(comparison_df.to_string(index=False))
print("=" * 80)

# Save CSV
csv_file = f'{RESULTS_DIR}/final_comparison.csv'
comparison_df.to_csv(csv_file, index=False)
print(f"\n Saved: {csv_file}")

# ============================================================================
# Visualizations
# ============================================================================

def plot_comparison(results_dict):
    """Generate comparison plots."""

    if len(results_dict) == 0:
        print("  No results to plot")
        return

    fig, axes = plt.subplots(1, 3, figsize=(18, 5))

    backbones = list(results_dict.keys())
    display_names = [results_dict[b]['display_name'] for b in backbones]
    thresholds = [0.05, 0.10, 0.15, 0.20]
    colors = sns.color_palette('husl', len(backbones))

    # Plot 1: PCK Curves
    ax1 = axes[0]
    for backbone, color in zip(backbones, colors):
        res = results_dict[backbone]
        pck_vals = [res['overall'][f'PCK@{t:.2f}']['mean'] for t in thresholds]
        ax1.plot(thresholds, pck_vals, marker='o', linewidth=2,
                label=res['display_name'], markersize=8, color=color)

    ax1.set_xlabel('Threshold', fontsize=12, fontweight='bold')
    ax1.set_ylabel('PCK', fontsize=12, fontweight='bold')
    ax1.set_title('PCK Curves', fontsize=14, fontweight='bold')
    ax1.legend(loc='lower right')
    ax1.grid(True, alpha=0.3)
    ax1.set_ylim([0, 1])

    # Plot 2: PCK@0.10 Bar Chart
    ax2 = axes[1]
    pck_010 = [results_dict[b]['overall']['PCK@0.10']['mean'] for b in backbones]

    bars = ax2.bar(range(len(backbones)), pck_010, color=colors, alpha=0.8)
    ax2.set_xticks(range(len(backbones)))
    ax2.set_xticklabels(display_names, rotation=45, ha='right')
    ax2.set_ylabel('PCK@0.10', fontsize=12, fontweight='bold')
    ax2.set_title('Accuracy Comparison', fontsize=14, fontweight='bold')
    ax2.set_ylim([0, 1])
    ax2.grid(axis='y', alpha=0.3)

    for bar, val in zip(bars, pck_010):
        ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
                f'{val:.3f}', ha='center', va='bottom', fontsize=10, fontweight='bold')

    # Plot 3: Inference Time
    ax3 = axes[2]
    times = [results_dict[b]['inference_time_ms'] for b in backbones]
    bars = ax3.bar(range(len(backbones)), times, color=colors, alpha=0.8)
    ax3.set_xticks(range(len(backbones)))
    ax3.set_xticklabels(display_names, rotation=45, ha='right')
    ax3.set_ylabel('Time (ms)', fontsize=12, fontweight='bold')
    ax3.set_title('Inference Speed', fontsize=14, fontweight='bold')
    ax3.grid(axis='y', alpha=0.3)

    for bar, val in zip(bars, times):
        ax3.text(bar.get_x() + bar.get_width()/2, bar.get_height() + max(times)*0.02,
                f'{val:.1f}', ha='center', va='bottom', fontsize=10, fontweight='bold')

    plt.tight_layout()

    # Save
    save_path = f'{FIGURES_DIR}/final_comparison.png'
    plt.savefig(save_path, dpi=300, bbox_inches='tight')
    print(f"\n Saved: {save_path}")

    plt.show()

# Generate plots
plot_comparison(all_results)

print("\n Comparison complete!")