# How to estimate Uncertainty?

We implement a **Post-Hoc Uncertainty Estimator** using a **Logistic Regression** classifier.
- **Input Features:** Number of Inliers (from Re-ranking) and L2 Distance (from Global Retrieval).
- **Training Data:** GSV-XS dataset (treated as a validation/calibration set).
- **Testing Data:** SVOX and Tokyo-XS datasets.
- **Metrics:** AUPRC, Spearman Correlation, and **AUSC (Area Under Sparsification Curve)**.

In [2]:
!python3 train_uncertainty_metrics.py\
  --train-preds-dir  ./train_logs/logs_svox_night/mixvpr_resnet50_svox_night/preds \
  --train-inliers-dir ./train_logs/logs_svox_night/mixvpr_resnet50_svox_night/preds_loftr \
  --train-z-data-path ./train_logs/logs_svox_night/mixvpr_resnet50_svox_night/z_data.torch \
  --test-preds-dir   ./test_logs/logs_svox_night/mixvpr_resnet50_svox_night/preds \
  --test-inliers-dir  ./test_logs/logs_svox_night/mixvpr_resnet50_svox_night/preds_loftr  \
  --test-z-data-path  ./test_logs/logs_svox_night/mixvpr_resnet50_svox_night/z_data.torch \
  --positive-dist-threshold 25 \
  --val-ratio 0.15 \
  --split-mode contiguous \
  --features inliers,l2


=== Train/Val split ===
N_train_all=702 | train_sub=596 | val_sub=106
split_mode=contiguous val_ratio=0.15 seed=42
features=['inliers', 'gated l2']
best T=9.000 | best C=0.1 (VAL AUPRC=12.6)
  ret = a @ b
  ret = a @ b
  ret = a @ b
Computing L2 scores: 100%|███████████████| 823/823 [00:00<00:00, 3455367.56it/s]
Computing PA scores: 100%|███████████████| 823/823 [00:00<00:00, 3214070.94it/s]
Computing SUE scores: 100%|████████████████| 823/823 [00:00<00:00, 46300.83it/s]

=== TEST AUPRC (positive = correct) ===
LogReg(inliers+gated l2): 98.8
L2-distance:            89.6
PA-score:               82.9
SUE:                    89.0
Random:                 57.7
Inliers:                98.7

=== TEST Sparsification (Risk-Coverage) ===
LogReg(inliers+gated l2) | AURC= 0.0904 | oracle= 0.0820
Inliers                | AURC= 0.0908 | oracle= 0.0820
L2-distance            | AURC= 0.1535 | oracle= 0.0820
PA-score               | AURC= 0.2054 | oracle= 0.0820
SUE                    | AURC= 0.2531 |

In [9]:
import os
import subprocess
import re
import pandas as pd
from tqdm import tqdm

# --- CONFIGURAZIONE ---
TRAIN_LOGS_BASE = "train_logs"
TEST_LOGS_BASE = "test_logs"

METHODS = ['mixvpr_resnet50', 'netvlad_vgg16']
MATCHERS = ['superpoint-lg', 'loftr']
TRAIN_DATASETS = ['svox_night', 'svox_sun']
TEST_DATASETS = ['svox_sun', 'svox_night', 'tokyo_xs', 'sf_xs']

SCRIPT_NAME = "train_uncertainty_metrics.py"

def parse_output(output_text):
    """
    Legge l'output testuale e estrae TUTTI i numeri (Ours, Inliers, L2).
    """
    metrics = {}
    
    # --- 1. AUPRC (Sezione === TEST AUPRC ===) ---
    # Ours (LogReg)
    auprc_ours = re.search(r"LogReg\([^)]+\):\s+(\d+\.\d+)", output_text)
    metrics['AUPRC_Ours'] = float(auprc_ours.group(1)) if auprc_ours else None

    # Inliers
    auprc_inl = re.search(r"Inliers:\s+(\d+\.\d+)", output_text)
    metrics['AUPRC_Inliers'] = float(auprc_inl.group(1)) if auprc_inl else None

    # L2 Distance
    auprc_l2 = re.search(r"L2-distance:\s+(\d+\.\d+)", output_text)
    metrics['AUPRC_L2'] = float(auprc_l2.group(1)) if auprc_l2 else None

    # --- 2. AUSC (Sezione === TEST Sparsification ===) ---
    # Ours
    ausc_ours = re.search(r"LogReg\([^)]+\)\s+\|\s+AUSC=\s*(\d+\.\d+)", output_text)
    metrics['AUSC_Ours'] = float(ausc_ours.group(1)) if ausc_ours else None

    # Inliers
    ausc_inl = re.search(r"Inliers\s+\|\s+AUSC=\s*(\d+\.\d+)", output_text)
    metrics['AUSC_Inliers'] = float(ausc_inl.group(1)) if ausc_inl else None

    # L2 Distance
    ausc_l2 = re.search(r"L2-distance\s+\|\s+AUSC=\s*(\d+\.\d+)", output_text)
    metrics['AUSC_L2'] = float(ausc_l2.group(1)) if ausc_l2 else None
        
    # --- 3. Best C ---
    c_match = re.search(r"best C=(\d+(\.\d+)?)", output_text)
    metrics['Best_C'] = float(c_match.group(1)) if c_match else '-'

     # --- 4. Spearman ---
    sp_ours = re.search(r"LogReg\([^)]+\)\s+\|\s+Spearman R=\s*([-+]?\d*\.\d+)", output_text)
    metrics['Spearman_Ours'] = float(sp_ours.group(1)) if sp_ours else None

    sp_inl = re.search(r"Inliers\s+\|\s+Spearman R=\s*([-+]?\d*\.\d+)", output_text)
    metrics['Spearman_Inliers'] = float(sp_inl.group(1)) if sp_inl else None

    sp_l2 = re.search(r"L2-distance\s+\|\s+Spearman R=\s*([-+]?\d*\.\d+)", output_text)
    metrics['Spearman_L2'] = float(sp_l2.group(1)) if sp_l2 else None
        
    return metrics

def run_benchmark():
    results = []
    
    total_runs = len(METHODS) * len(MATCHERS) * len(TRAIN_DATASETS) * len(TEST_DATASETS)
    pbar = tqdm(total=total_runs, desc="Running Benchmark")

    for method in METHODS:
        for matcher in MATCHERS:
            for train_ds in TRAIN_DATASETS:
                
                tr_base = os.path.join(TRAIN_LOGS_BASE, f"logs_{train_ds}", f"{method}_{train_ds}")
                tr_preds = os.path.join(tr_base, "preds")
                tr_inliers = os.path.join(tr_base, f"preds_{matcher}")
                tr_z = os.path.join(tr_base, "z_data.torch")

                for test_ds in TEST_DATASETS:
                    pbar.set_description(f"Eval: {method} {matcher} | {train_ds}->{test_ds}")
                    
                    te_base = os.path.join(TEST_LOGS_BASE, f"logs_{test_ds}", f"{method}_{test_ds}")
                    te_preds = os.path.join(te_base, "preds")
                    te_inliers = os.path.join(te_base, f"preds_{matcher}")
                    te_z = os.path.join(te_base, "z_data.torch")

                    if not (os.path.exists(tr_preds) and os.path.exists(te_preds)):
                        pbar.update(1)
                        continue

                    cmd = [
                        "python3", SCRIPT_NAME,
                        "--train-preds-dir", tr_preds,
                        "--train-inliers-dir", tr_inliers,
                        "--train-z-data-path", tr_z,
                        "--test-preds-dir", te_preds,
                        "--test-inliers-dir", te_inliers,
                        "--test-z-data-path", te_z,
                        "--features", "inliers,l2",
                        "--gate-percentiles", "10,20,30",
                        "--val-ratio", "0.15",
                        "--split-mode", "contiguous"
                    ]

                    try:
                        proc = subprocess.run(cmd, capture_output=True, text=True, check=True)
                        metrics = parse_output(proc.stdout)
                        
                        entry = {
                            "VPR Method": method.split('_')[0].upper(),
                            "Matcher": matcher.replace("superpoint-", "").title(),
                            "Train Set": train_ds.replace("_", " ").title(),
                            "Test Set": test_ds.replace("_", " ").title(),
                            
                            # AUPRC
                            "Ours AUPRC": metrics['AUPRC_Ours'],
                            "Inl AUPRC": metrics['AUPRC_Inliers'],
                            "L2 AUPRC": metrics['AUPRC_L2'],
                            
                            # AUSC
                            "Ours AUSC": metrics['AUSC_Ours'],
                            "Inl AUSC": metrics['AUSC_Inliers'],
                            "L2 AUSC": metrics['AUSC_L2'],
                            
                            "Ours Spearman": metrics['Spearman_Ours'],
                            "Inl Spearman": metrics['Spearman_Inliers'],
                            "L2 Spearman": metrics['Spearman_L2'],

                            
                            "Best C": metrics['Best_C']
                        }
                        results.append(entry)

                    except subprocess.CalledProcessError as e:
                        print(f"\nError in {train_ds}->{test_ds}:\n{e.stderr}")
                    
                    pbar.update(1)

    pbar.close()
    return pd.DataFrame(results)

# --- AVVIO ---
df_final = run_benchmark()
print("\n=== RISULTATI BENCHMARK COMPLETI ===")
print(df_final.to_string(index=False))
# df_final.to_csv("benchmark_results_final.csv", index=False)

Eval: netvlad_vgg16 loftr | svox_sun->sf_xs: 100%|██████████| 32/32 [01:31<00:00,  2.87s/it]


=== RISULTATI BENCHMARK COMPLETI ===
VPR Method Matcher  Train Set   Test Set  Ours AUPRC  Inl AUPRC  L2 AUPRC  Ours AUSC  Inl AUSC  L2 AUSC  Ours Spearman  Inl Spearman  L2 Spearman  Best C
    MIXVPR      Lg Svox Night   Svox Sun        99.1       99.0      96.7     0.0211    0.0212   0.0416        -0.5692       -0.5684      -0.4454    0.01
    MIXVPR      Lg Svox Night Svox Night        98.3       98.3      89.6     0.0931    0.0931   0.1537        -0.8010       -0.8009      -0.5710    0.01
    MIXVPR      Lg Svox Night   Tokyo Xs        99.5       99.5      98.4     0.0343    0.0345   0.0433        -0.7071       -0.7065      -0.6523    0.01
    MIXVPR      Lg Svox Night      Sf Xs        98.5       98.5      94.0     0.0623    0.0623   0.0963        -0.7581       -0.7585      -0.6063    0.01
    MIXVPR      Lg   Svox Sun   Svox Sun        99.1       99.0      96.7     0.0211    0.0212   0.0416        -0.5692       -0.5684      -0.4454    0.01
    MIXVPR      Lg   Svox Sun Svox Nig


