# Compare NTTT vs SAM3 Metrics

This notebook loads saved prediction JSON files for NTTT and SAM3, then computes COCO bbox/segm metrics on the same ground-truth annotations.

In [None]:
from pathlib import Path
import json
import pandas as pd
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

In [None]:
# Update these paths to your run directory
GT_JSON = Path('../data/olive_diseases/annotations/instances_val2017.json')
NTTT_PRED_JSON = Path('../work_dirs/olive_nttt_sam3_eval/dinov2_large_10shot_seed42/nttt_predictions.json')
SAM3_PRED_JSON = Path('../work_dirs/olive_nttt_sam3_eval/dinov2_large_10shot_seed42/sam3_predictions.json')

NTTT_RUNTIME_JSON = NTTT_PRED_JSON.parent / 'nttt_runtime.json'
SAM3_RUNTIME_JSON = SAM3_PRED_JSON.parent / 'sam3_runtime.json'

assert GT_JSON.exists(), f'Missing GT file: {GT_JSON}'
assert NTTT_PRED_JSON.exists(), f'Missing NTTT predictions: {NTTT_PRED_JSON}'
assert SAM3_PRED_JSON.exists(), f'Missing SAM3 predictions: {SAM3_PRED_JSON}'

print('GT:', GT_JSON)
print('NTTT predictions:', NTTT_PRED_JSON)
print('SAM3 predictions:', SAM3_PRED_JSON)
print('NTTT runtime file:', NTTT_RUNTIME_JSON)
print('SAM3 runtime file:', SAM3_RUNTIME_JSON)

In [None]:
with open(NTTT_PRED_JSON, 'r') as f:
    nttt_preds = json.load(f)

with open(SAM3_PRED_JSON, 'r') as f:
    sam3_preds = json.load(f)

print(f'NTTT predictions: {len(nttt_preds)}')
print(f'SAM3 predictions: {len(sam3_preds)}')

In [None]:
def load_runtime_stat(path, key):
    if not path.exists():
        return float('nan')
    with open(path, 'r') as f:
        data = json.load(f)
    value = data.get(key, float('nan'))
    if value is None:
        return float('nan')
    return float(value)

def run_coco_eval(gt_json_path, predictions, iou_type='segm'):
    coco_gt = COCO(str(gt_json_path))
    if len(predictions) == 0:
        raise ValueError('Prediction list is empty.')

    coco_dt = coco_gt.loadRes(predictions)
    coco_eval = COCOeval(coco_gt, coco_dt, iou_type)
    coco_eval.params.imgIds = sorted(coco_gt.getImgIds())
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    stats = coco_eval.stats
    return {
        'AP': float(stats[0]),
        'AP50': float(stats[1]),
        'AP75': float(stats[2]),
        'AP_small': float(stats[3]),
        'AP_medium': float(stats[4]),
        'AP_large': float(stats[5]),
        'AR@1': float(stats[6]),
        'AR@10': float(stats[7]),
        'AR@100': float(stats[8]),
        'AR_small': float(stats[9]),
        'AR_medium': float(stats[10]),
        'AR_large': float(stats[11]),
    }

In [None]:
rows = []

runtime_files = {
    'NTTT': NTTT_RUNTIME_JSON,
    'SAM3': SAM3_RUNTIME_JSON,
}

for model_name, preds in [('NTTT', nttt_preds), ('SAM3', sam3_preds)]:
    print(f'\n===== {model_name} | BBOX =====')
    bbox_stats = run_coco_eval(GT_JSON, preds, iou_type='bbox')

    print(f'\n===== {model_name} | SEGM =====')
    segm_stats = run_coco_eval(GT_JSON, preds, iou_type='segm')

    runtime_path = runtime_files[model_name]
    row = {
        'model': model_name,
        'fps': load_runtime_stat(runtime_path, 'fps'),
        'peak_vram_mib': load_runtime_stat(runtime_path, 'peak_vram_mib'),
    }
    row.update({f'bbox_{k}': v for k, v in bbox_stats.items()})
    row.update({f'segm_{k}': v for k, v in segm_stats.items()})
    rows.append(row)

metrics_df = pd.DataFrame(rows).set_index('model')
metrics_df

In [None]:
display_cols = [
    'fps', 'peak_vram_mib',
    'bbox_AP', 'bbox_AP50', 'bbox_AP75',
    'segm_AP', 'segm_AP50', 'segm_AP75'
]
metrics_df[display_cols].sort_values('segm_AP', ascending=False)