In [1]:
# ============================================================
# Task 1 — DINOv3 (FROZEN) — SPair-71k TEST
# Baseline protocol (DIFT-style):
# 1) Dense features from frozen backbone
# 2) For each source keypoint: cosine sim vs ALL target patches
# 3) Argmax -> predicted patch; output pixel = patch center
# 4) Evaluate PCK@{0.05, 0.10, 0.20}
#    - norm = max(width,height) of TARGET bbox (match DINOv2 baseline)
#
# OUTPUT:
# - Prints GLOBAL PCK (per-image mean + per-keypoint)
# - Prints PER-CATEGORY PCK (per-keypoint + per-image mean)
#
# Notes:
# - NO CSV saving
# - T4-safe: chunked similarity + explicit cache clearing
# - Runs 3 configs: LastLayer, InterLayer_10, FusionMean_last4(8,9,10,11)
# - Uses DataLoader correctly (batch_size=1 + custom collate_fn)
# ============================================================

In [2]:
from google.colab import drive
drive.mount("/content/drive")

from pathlib import Path
import os, json, math, time
import numpy as np
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from PIL import Image

# ----------------------------
# Paths
# ----------------------------
SPAIR_ROOT = Path("/content/drive/MyDrive/AMLDataset/SPair-71k")
PAIR_ANN_PATH = SPAIR_ROOT / "PairAnnotation"
LAYOUT_PATH   = SPAIR_ROOT / "Layout"
IMAGE_PATH    = SPAIR_ROOT / "JPEGImages"
assert SPAIR_ROOT.exists(), f"SPair-71k non trovato: {SPAIR_ROOT}"
assert PAIR_ANN_PATH.exists() and LAYOUT_PATH.exists() and IMAGE_PATH.exists(), "Cartelle SPair mancanti"


Mounted at /content/drive


In [3]:
# ----------------------------
# Dataset
# ----------------------------
def read_img(image_path: str) -> torch.Tensor:
    """Return CHW float32 in [0..255]."""
    img = np.array(Image.open(image_path).convert("RGB"))
    return torch.from_numpy(img.transpose(2, 0, 1)).float()

class SPairDataset(Dataset):
    def __init__(self, pair_ann_path, layout_path, image_path, dataset_size="large", datatype="test"):
        self.datatype = datatype
        self.pair_ann_path = str(pair_ann_path)
        self.layout_path   = str(layout_path)
        self.image_path    = str(image_path)

        layout_file = os.path.join(self.layout_path, dataset_size, datatype + ".txt")
        with open(layout_file, "r") as f:
            self.ann_files = [x for x in f.read().splitlines() if len(x) > 0]

    def __len__(self):
        return len(self.ann_files)

    def __getitem__(self, index):
        pair_id = self.ann_files[index]
        ann_filepath = os.path.join(self.pair_ann_path, self.datatype, pair_id + ".json")
        with open(ann_filepath, "r") as f:
            ann = json.load(f)

        category = ann["category"]
        src_img_path = os.path.join(self.image_path, category, ann["src_imname"])
        trg_img_path = os.path.join(self.image_path, category, ann["trg_imname"])

        return {
            "pair_id": pair_id,
            "category": category,
            "src_bbox": ann["src_bndbox"],
            "trg_bbox": ann["trg_bndbox"],
            "src_img": read_img(src_img_path),
            "trg_img": read_img(trg_img_path),
            "src_kps": torch.tensor(ann["src_kps"]).float(),
            "trg_kps": torch.tensor(ann["trg_kps"]).float(),
        }

In [4]:
# ----------------------------
# DataLoader (batch_size=1 + custom collate)
# ----------------------------
def collate_single(batch):
    # batch is list of length batch_size; we keep sample dict as-is
    assert len(batch) == 1
    return batch[0]

test_dataset = SPairDataset(PAIR_ANN_PATH, LAYOUT_PATH, IMAGE_PATH, dataset_size="large", datatype="test")
print("Test pairs:", len(test_dataset))

# If Colab gives worker issues, set NUM_WORKERS=0 and PERSISTENT=False.
NUM_WORKERS = 2
PERSISTENT = True if NUM_WORKERS > 0 else False

test_loader = DataLoader(
    test_dataset,
    batch_size=1,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True,
    collate_fn=collate_single,
    persistent_workers=PERSISTENT,
)

Test pairs: 12234


In [5]:
# ----------------------------
# Load DINOv3
# ----------------------------
%cd /content
!test -d dinov3 || git clone https://github.com/facebookresearch/dinov3.git
%cd /content/dinov3
!pip -q install einops timm opencv-python torchmetrics fvcore iopath

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

DINOV3_DIR = "/content/dinov3"
DINOV3_WEIGHTS = "/content/drive/MyDrive/AMLDataset/dinov3_vitb16_pretrain_lvd1689m-73cec8be.pth"
assert os.path.exists(DINOV3_WEIGHTS), f"Pesi DINOv3 non trovati: {DINOV3_WEIGHTS}"

dinov3 = torch.hub.load(
    DINOV3_DIR,
    "dinov3_vitb16",
    source="local",
    weights=DINOV3_WEIGHTS,
).eval().to(device)

for p in dinov3.parameters():
    p.requires_grad_(False)

assert hasattr(dinov3, "blocks")
N_BLOCKS = len(dinov3.blocks)
print("DINOv3 blocks:", N_BLOCKS)

/content
Cloning into 'dinov3'...
remote: Enumerating objects: 538, done.[K
remote: Counting objects: 100% (363/363), done.[K
remote: Compressing objects: 100% (264/264), done.[K
remote: Total 538 (delta 201), reused 99 (delta 99), pack-reused 175 (from 1)[K
Receiving objects: 100% (538/538), 9.88 MiB | 11.27 MiB/s, done.
Resolving deltas: 100% (223/223), done.
/content/dinov3
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m983.2/983.2 kB[0m [31m58.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for fvcore (setup.py) ... [?25l[?25hdone
  Building wheel for iopath (setup.py) ... [?25l[?25hdone
Device: cud

100%|██████████| 327M/327M [00:24<00:00, 13.9MB/s]


DINOv3 blocks: 12


In [6]:
# ----------------------------
# Preprocess & geometry
# ----------------------------
PATCH = 16
mean = torch.tensor([0.485, 0.456, 0.406], device=device).view(1, 3, 1, 1)
std  = torch.tensor([0.229, 0.224, 0.225], device=device).view(1, 3, 1, 1)

@torch.no_grad()
def preprocess_for_model(img_chw_0_255: torch.Tensor) -> torch.Tensor:
    x = (img_chw_0_255 / 255.0).unsqueeze(0).to(device, non_blocking=True)
    return (x - mean) / std

def pad_to_multiple(img_chw: torch.Tensor, k: int) -> torch.Tensor:
    C, H, W = img_chw.shape
    new_h = int(math.ceil(H / k) * k)
    new_w = int(math.ceil(W / k) * k)
    pad_bottom = new_h - H
    pad_right  = new_w - W
    if pad_bottom == 0 and pad_right == 0:
        return img_chw
    return F.pad(img_chw, (0, pad_right, 0, pad_bottom), value=0.0)

def safe_tokens(out):
    if isinstance(out, (tuple, list)):
        out = out[0]
    if not torch.is_tensor(out) or out.ndim != 3:
        raise RuntimeError(f"Unexpected tokens output: {type(out)} shape={getattr(out,'shape',None)}")
    return out

def tokens_to_featuremap(tokens_bnc: torch.Tensor, h_grid: int, w_grid: int) -> torch.Tensor:
    tok = tokens_bnc.squeeze(0)  # [Ntok,C]
    Npatch = h_grid * w_grid
    if tok.shape[0] < Npatch:
        raise RuntimeError(f"Ntok={tok.shape[0]} < Npatch={Npatch}")
    patch_tok = tok[-Npatch:]    # drop CLS/register
    Ft = patch_tok.view(h_grid, w_grid, -1)
    return F.normalize(Ft, dim=-1)

In [7]:
# ----------------------------
# Feature extractors
# ----------------------------
@torch.no_grad()
def feat_last(img_pad_chw: torch.Tensor, hg: int, wg: int) -> torch.Tensor:
    out = dinov3.forward_features(preprocess_for_model(img_pad_chw))
    patch = out["x_norm_patchtokens"].squeeze(0)  # [N,C]
    if patch.shape[0] != hg * wg:
        raise RuntimeError(f"Patch tokens N={patch.shape[0]} != hg*wg={hg*wg}")
    return F.normalize(patch.view(hg, wg, -1), dim=-1)

@torch.no_grad()
def feat_inter(img_pad_chw: torch.Tensor, layer_id: int, hg: int, wg: int) -> torch.Tensor:
    captured = {}
    def hook(m, inp, out):
        captured["t"] = safe_tokens(out).detach()
    h = dinov3.blocks[layer_id].register_forward_hook(hook)
    _ = dinov3.forward_features(preprocess_for_model(img_pad_chw))
    h.remove()
    if "t" not in captured:
        raise RuntimeError(f"Hook failed for layer {layer_id}")
    return tokens_to_featuremap(captured["t"], hg, wg)

@torch.no_grad()
def feat_fusion_mean(img_pad_chw: torch.Tensor, layer_ids, hg: int, wg: int) -> torch.Tensor:
    feats = {}
    handles = []
    def mk_hook(lid):
        def hook(m, inp, out):
            feats[lid] = safe_tokens(out).detach()
        return hook

    for lid in layer_ids:
        handles.append(dinov3.blocks[lid].register_forward_hook(mk_hook(lid)))

    _ = dinov3.forward_features(preprocess_for_model(img_pad_chw))

    for hh in handles:
        hh.remove()

    missing = [lid for lid in layer_ids if lid not in feats]
    if missing:
        raise RuntimeError(f"Missing feats for layers: {missing}")

    fmaps = [tokens_to_featuremap(feats[lid], hg, wg) for lid in layer_ids]
    Ft = torch.stack(fmaps, dim=0).mean(dim=0)
    return F.normalize(Ft, dim=-1)

In [8]:
# ----------------------------
# Matching (chunked cosine)
# ----------------------------
@torch.no_grad()
def argmax_cosine(Ft_flat: torch.Tensor, f_src: torch.Tensor, chunk: int = 4096) -> int:
    best_val = None
    best_idx = 0
    N = Ft_flat.shape[0]
    for s in range(0, N, chunk):
        part = Ft_flat[s:s+chunk]
        sim = (part * f_src).sum(dim=-1)
        v, i = sim.max(dim=0)
        v = float(v.item())
        i = int(i.item()) + s
        if (best_val is None) or (v > best_val):
            best_val, best_idx = v, i
    return best_idx

@torch.no_grad()
def match_one_pair(sample, feat_fn, sim_chunk=4096):
    src_img, trg_img = sample["src_img"], sample["trg_img"]
    src_kps, trg_kps = sample["src_kps"], sample["trg_kps"]

    # Handle [2,K] -> [K,2]
    if src_kps.ndim == 2 and src_kps.shape[0] == 2 and src_kps.shape[1] != 2:
        src_kps = src_kps.t()
    if trg_kps.ndim == 2 and trg_kps.shape[0] == 2 and trg_kps.shape[1] != 2:
        trg_kps = trg_kps.t()

    _, Hs, Ws = src_img.shape
    _, Ht, Wt = trg_img.shape

    src_pad = pad_to_multiple(src_img, PATCH)
    trg_pad = pad_to_multiple(trg_img, PATCH)

    hg_s, wg_s = src_pad.shape[1] // PATCH, src_pad.shape[2] // PATCH
    hg_t, wg_t = trg_pad.shape[1] // PATCH, trg_pad.shape[2] // PATCH

    Fs = feat_fn(src_pad, hg_s, wg_s)
    Ft = feat_fn(trg_pad, hg_t, wg_t)
    Ft_flat = Ft.view(-1, Ft.shape[-1])

    preds, gts = [], []
    for sp, gt in zip(src_kps, trg_kps):
        if torch.isnan(sp).any() or torch.isnan(gt).any():
            continue
        if (sp[0] < 0) or (sp[1] < 0) or (gt[0] < 0) or (gt[1] < 0):
            continue

        x_src, y_src = float(sp[0].item()), float(sp[1].item())
        x_gt,  y_gt  = float(gt[0].item()), float(gt[1].item())

        if not (0.0 <= x_src < Ws and 0.0 <= y_src < Hs):
            continue
        if not (0.0 <= x_gt  < Wt and 0.0 <= y_gt  < Ht):
            continue

        jsrc = min(int(x_src) // PATCH, wg_s - 1)
        isrc = min(int(y_src) // PATCH, hg_s - 1)

        f_src = Fs[isrc, jsrc]
        best = argmax_cosine(Ft_flat, f_src, chunk=sim_chunk)

        it = best // wg_t
        jt = best %  wg_t
        x_pred = jt * PATCH + (PATCH / 2.0)
        y_pred = it * PATCH + (PATCH / 2.0)

        preds.append((x_pred, y_pred))
        gts.append((x_gt, y_gt))

    # Free GPU memory aggressively
    del Fs, Ft, Ft_flat
    torch.cuda.empty_cache()

    if len(preds) == 0:
        return torch.zeros((0,2)), torch.zeros((0,2))
    return torch.tensor(preds, dtype=torch.float32), torch.tensor(gts, dtype=torch.float32)

In [9]:
# ----------------------------
# Evaluation using DataLoader (global + per-category, per-kp and per-img)
# ----------------------------
def evaluate_loader(name, feat_fn, loader, max_pairs=None, sim_chunk=4096):
    thresholds = (0.05, 0.10, 0.20)

    global_correct = {T: 0.0 for T in thresholds}
    global_total_kp = 0.0
    all_pck_img = {T: [] for T in thresholds}

    per_cat_correct = {}
    per_cat_total_kp = {}
    per_cat_pck_img = {}

    n_seen = 0
    for sample in loader:
        if max_pairs is not None and n_seen >= int(max_pairs):
            break

        n_seen += 1
        if n_seen % 100 == 0:
            print(f"[{name}] {n_seen}")

        cat = sample["category"]
        if cat not in per_cat_correct:
            per_cat_correct[cat] = {T: 0.0 for T in thresholds}
            per_cat_total_kp[cat] = 0.0
            per_cat_pck_img[cat] = {T: [] for T in thresholds}

        pred, gt = match_one_pair(sample, feat_fn, sim_chunk=sim_chunk)
        if pred.shape[0] == 0:
            continue

        x0, y0, x1, y1 = map(float, sample["trg_bbox"])
        norm = max(x1 - x0, y1 - y0)
        if norm <= 1e-6:
            continue

        dists = torch.linalg.norm(pred - gt, dim=1)
        N = float(dists.numel())
        if N <= 0:
            continue

        global_total_kp += N
        per_cat_total_kp[cat] += N

        for T in thresholds:
            thr = T * norm
            correct = float((dists <= thr).float().sum().item())
            pck_img = correct / N

            global_correct[T] += correct
            per_cat_correct[cat][T] += correct

            all_pck_img[T].append(pck_img)
            per_cat_pck_img[cat][T].append(pck_img)

    mean_pck_img = {T: float(np.mean(all_pck_img[T])) if len(all_pck_img[T]) else 0.0 for T in thresholds}
    global_pck_kp = {T: float(global_correct[T] / max(global_total_kp, 1.0)) for T in thresholds}

    per_cat_pck_kp = {}
    per_cat_mean_pck_img = {}
    for cat in per_cat_correct:
        per_cat_pck_kp[cat] = {T: float(per_cat_correct[cat][T] / max(per_cat_total_kp[cat], 1.0)) for T in thresholds}
        per_cat_mean_pck_img[cat] = {T: float(np.mean(per_cat_pck_img[cat][T])) if len(per_cat_pck_img[cat][T]) else 0.0 for T in thresholds}

    return {
        "name": name,
        "n_pairs_run": n_seen,
        "mean_pck_per_img": mean_pck_img,
        "global_pck_per_kp": global_pck_kp,
        "per_cat_pck_per_kp": per_cat_pck_kp,
        "per_cat_mean_pck_per_img": per_cat_mean_pck_img,
    }

In [10]:
# ----------------------------
# Printing helpers
# ----------------------------
def print_global_report(r):
    thresholds = (0.05, 0.10, 0.20)
    print("\n================ TASK 1 REPORT ================")
    print("Config:", r["name"])
    print("Pairs run:", r["n_pairs_run"])

    print("\nGlobal PCK (per-image mean):")
    for T in thresholds:
        print(f"  PCK@{T:.2f}: {100.0*r['mean_pck_per_img'][T]:.2f}%")

    print("\nGlobal PCK (per-keypoint):")
    for T in thresholds:
        print(f"  PCK@{T:.2f}: {100.0*r['global_pck_per_kp'][T]:.2f}%")

def print_per_category_table(r):
    thresholds = (0.05, 0.10, 0.20)
    cats = sorted(r["per_cat_pck_per_kp"].keys())

    print("\n================ PER-CATEGORY RESULTS ================")
    print("Category".ljust(15), end="")
    for T in thresholds:
        print(f" KP@{T:.2f}".rjust(9), end="")
    for T in thresholds:
        print(f" IMG@{T:.2f}".rjust(9), end="")
    print()
    print("-" * (15 + 9*6))

    for cat in cats:
        print(cat.ljust(15), end="")
        for T in thresholds:
            v = 100.0 * r["per_cat_pck_per_kp"][cat][T]
            print(f"{v:8.2f}%".rjust(9), end="")
        for T in thresholds:
            v = 100.0 * r["per_cat_mean_pck_per_img"][cat][T]
            print(f"{v:8.2f}%".rjust(9), end="")
        print()

In [None]:
# ----------------------------
# LastLayer
# ----------------------------
SIM_CHUNK = 4096   # if OOM: 2048
MAX_PAIRS = None   # full test (use 200 for debug)

t0 = time.time()
r_last = evaluate_loader(
    name="LastLayer",
    feat_fn=lambda img,hg,wg: feat_last(img,hg,wg),
    loader=test_loader,
    max_pairs=MAX_PAIRS,
    sim_chunk=SIM_CHUNK,
)
print_global_report(r_last)
print_per_category_table(r_last)
print("\nMinutes:", (time.time() - t0) / 60.0)

[LastLayer] 100
[LastLayer] 200
[LastLayer] 300
[LastLayer] 400
[LastLayer] 500
[LastLayer] 600
[LastLayer] 700
[LastLayer] 800
[LastLayer] 900
[LastLayer] 1000
[LastLayer] 1100
[LastLayer] 1200
[LastLayer] 1300
[LastLayer] 1400
[LastLayer] 1500
[LastLayer] 1600
[LastLayer] 1700
[LastLayer] 1800
[LastLayer] 1900
[LastLayer] 2000
[LastLayer] 2100
[LastLayer] 2200
[LastLayer] 2300
[LastLayer] 2400
[LastLayer] 2500
[LastLayer] 2600
[LastLayer] 2700
[LastLayer] 2800
[LastLayer] 2900
[LastLayer] 3000
[LastLayer] 3100
[LastLayer] 3200
[LastLayer] 3300
[LastLayer] 3400
[LastLayer] 3500
[LastLayer] 3600
[LastLayer] 3700
[LastLayer] 3800
[LastLayer] 3900
[LastLayer] 4000
[LastLayer] 4100
[LastLayer] 4200
[LastLayer] 4300
[LastLayer] 4400
[LastLayer] 4500
[LastLayer] 4600
[LastLayer] 4700
[LastLayer] 4800
[LastLayer] 4900
[LastLayer] 5000
[LastLayer] 5100
[LastLayer] 5200
[LastLayer] 5300
[LastLayer] 5400
[LastLayer] 5500
[LastLayer] 5600
[LastLayer] 5700
[LastLayer] 5800
[LastLayer] 5900
[LastL

In [None]:
!nvidia-smi


Wed Dec 31 17:53:11 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   78C    P0             34W /   70W |     514MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
# ----------------------------
# InterLayer_10
# ----------------------------
LID = 10
t0 = time.time()
r_inter = evaluate_loader(
    name=f"InterLayer_{LID:02d}",
    feat_fn=lambda img,hg,wg: feat_inter(img, LID, hg, wg),
    loader=test_loader,
    max_pairs=MAX_PAIRS,
    sim_chunk=SIM_CHUNK,
)
print_global_report(r_inter)
print_per_category_table(r_inter)
print("\nMinutes:", (time.time() - t0) / 60.0)

[InterLayer_10] 100
[InterLayer_10] 200
[InterLayer_10] 300
[InterLayer_10] 400
[InterLayer_10] 500
[InterLayer_10] 600
[InterLayer_10] 700
[InterLayer_10] 800
[InterLayer_10] 900
[InterLayer_10] 1000
[InterLayer_10] 1100
[InterLayer_10] 1200
[InterLayer_10] 1300
[InterLayer_10] 1400
[InterLayer_10] 1500
[InterLayer_10] 1600
[InterLayer_10] 1700
[InterLayer_10] 1800
[InterLayer_10] 1900
[InterLayer_10] 2000
[InterLayer_10] 2100
[InterLayer_10] 2200
[InterLayer_10] 2300
[InterLayer_10] 2400
[InterLayer_10] 2500
[InterLayer_10] 2600
[InterLayer_10] 2700
[InterLayer_10] 2800
[InterLayer_10] 2900
[InterLayer_10] 3000
[InterLayer_10] 3100
[InterLayer_10] 3200
[InterLayer_10] 3300
[InterLayer_10] 3400
[InterLayer_10] 3500
[InterLayer_10] 3600
[InterLayer_10] 3700
[InterLayer_10] 3800
[InterLayer_10] 3900
[InterLayer_10] 4000
[InterLayer_10] 4100
[InterLayer_10] 4200
[InterLayer_10] 4300
[InterLayer_10] 4400
[InterLayer_10] 4500
[InterLayer_10] 4600
[InterLayer_10] 4700
[InterLayer_10] 4800
[

In [None]:
!nvidia-smi


Wed Dec 31 18:16:31 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   72C    P0             32W /   70W |     514MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [12]:
# ----------------------------
# FusionMean_last4 (8,9,10,11)
# ----------------------------
SIM_CHUNK = 4096   # if OOM: 2048
MAX_PAIRS = None   # full test (use 200 for debug)
LIDS_FUS = (8, 9, 10, 11)
t0 = time.time()
r_fus = evaluate_loader(
    name="FusionMean_last4",
    feat_fn=lambda img,hg,wg: feat_fusion_mean(img, LIDS_FUS, hg, wg),
    loader=test_loader,
    max_pairs=MAX_PAIRS,
    sim_chunk=SIM_CHUNK,
)
print_global_report(r_fus)
print_per_category_table(r_fus)
print("\nMinutes:", (time.time() - t0) / 60.0)

[FusionMean_last4] 100
[FusionMean_last4] 200
[FusionMean_last4] 300
[FusionMean_last4] 400
[FusionMean_last4] 500
[FusionMean_last4] 600
[FusionMean_last4] 700
[FusionMean_last4] 800
[FusionMean_last4] 900
[FusionMean_last4] 1000
[FusionMean_last4] 1100
[FusionMean_last4] 1200
[FusionMean_last4] 1300
[FusionMean_last4] 1400
[FusionMean_last4] 1500
[FusionMean_last4] 1600
[FusionMean_last4] 1700
[FusionMean_last4] 1800
[FusionMean_last4] 1900
[FusionMean_last4] 2000
[FusionMean_last4] 2100
[FusionMean_last4] 2200
[FusionMean_last4] 2300
[FusionMean_last4] 2400
[FusionMean_last4] 2500
[FusionMean_last4] 2600
[FusionMean_last4] 2700
[FusionMean_last4] 2800
[FusionMean_last4] 2900
[FusionMean_last4] 3000
[FusionMean_last4] 3100
[FusionMean_last4] 3200
[FusionMean_last4] 3300
[FusionMean_last4] 3400
[FusionMean_last4] 3500
[FusionMean_last4] 3600
[FusionMean_last4] 3700
[FusionMean_last4] 3800
[FusionMean_last4] 3900
[FusionMean_last4] 4000
[FusionMean_last4] 4100
[FusionMean_last4] 4200
[

In [13]:
!nvidia-smi

Fri Jan  2 11:02:48 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   55C    P0             26W /   70W |     514MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                