In [None]:
# pipeline_v2_run_all_full.py
# Fully-hardened, GPU-optimized bin verification pipeline (CLIP + size-ratio + Hungarian)
# Includes: long-title shortening for CLIP, text-embedding caching, resume support, incremental saves.
# Usage: run in one notebook cell (same environment as before). Requires ultralytics, CLIP, torch, scipy, PIL, pandas.

import os, random, json, math, time, re
from pathlib import Path
from collections import Counter
from tqdm import tqdm
import numpy as np
import pandas as pd
from PIL import Image, ImageDraw, ImageFont

import torch
from scipy.optimize import linear_sum_assignment


DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
IMG_DIR = "sub_bin_images"           # raw images directory
META_DIR = "sub_metadata"           # metadata json dir
YOLO_WEIGHTS = "runs/detect/train/weights/best.pt"
OUT_DIR = Path("outputs/classified")
OUT_DIR.mkdir(parents=True, exist_ok=True)
ANNOTATED_DIR = OUT_DIR / "images"
ANNOTATED_DIR.mkdir(parents=True, exist_ok=True)
RESULT_CSV = OUT_DIR / "results_testing1.csv"
RANDOM_SEED = 42
BATCH_EMBED_SIZE = 32                # CLIP crop batch size (reduce if OOM)
ALPHA = 0.6                          # visual weight
BETA = 0.4                           # size weight
CLIP_MODEL_NAME = "ViT-B/32"         # CLIP model
FONT_FILE = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
VERBOSE = True
SAVE_EVERY = 50                      # save results every N images processed
SHORTEN_WORDS = 12                   # words to keep from long product titles for CLIP
# ----------------------------------------

random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
device = DEVICE

# ---------- Try import CLIP ----------
try:
    import clip
    CLIP_AVAILABLE = True
except Exception as e:
    CLIP_AVAILABLE = False
    raise RuntimeError("CLIP not available. Install: pip install git+https://github.com/openai/CLIP.git") from e

# ---------- Load YOLO ----------
from ultralytics import YOLO
print(f"[info] Loading YOLO weights: {YOLO_WEIGHTS} on device {device}")
yolo = YOLO(YOLO_WEIGHTS)
try:
    yolo.model.to(device)
except Exception:
    pass

# ---------- Load CLIP ----------
print(f"[info] Loading CLIP {CLIP_MODEL_NAME} on {device}")
clip_model, clip_preprocess = clip.load(CLIP_MODEL_NAME, device=device)
clip_model.eval()

# ---------- Helpers ----------
def safe_get_info(info, key):
    if not info:
        return None
    v = info.get(key, None)
    if v is None:
        return None
    if isinstance(v, dict):
        return v.get("value", None)
    return v

def load_image(path):
    return Image.open(path).convert("RGB")

def crop_box(pil_img, xyxy):
    x1, y1, x2, y2 = map(int, xyxy)
    x1, y1 = max(0, x1), max(0, y1)
    x2, y2 = max(1, x2), max(1, y2)
    return pil_img.crop((x1, y1, x2, y2))

def box_metrics(xyxy):
    x1,y1,x2,y2 = xyxy
    w = max(1, int(x2) - int(x1))
    h = max(1, int(y2) - int(y1))
    return {"w": w, "h": h, "area": w*h, "max_dim": max(w,h)}

def normalize_safe(mat):
    mat = np.array(mat, dtype=float)
    mat = np.nan_to_num(mat, nan=1e6, posinf=1e6, neginf=1e6)
    mn = float(np.min(mat))
    mx = float(np.max(mat))
    if mx - mn < 1e-12:
        return np.zeros_like(mat)
    return (mat - mn) / (mx - mn + 1e-9)

def hungarian_from_cost(cost):
    if cost is None:
        return []
    cost = np.array(cost, dtype=float)
    if cost.size == 0:
        return []
    cost = np.nan_to_num(cost, nan=1e6, posinf=1e6, neginf=1e6)
    n,m = cost.shape
    N = max(n,m)
    pad_val = float(cost.max() * 10.0 if cost.size else 1e9)
    pad_cost = np.full((N,N), pad_val, dtype=float)
    pad_cost[:n, :m] = cost
    pad_cost = np.nan_to_num(pad_cost, nan=1e6, posinf=1e6, neginf=1e6)
    row_ind, col_ind = linear_sum_assignment(pad_cost)
    pairs = [(r,c) for r,c in zip(row_ind, col_ind) if r < n and c < m]
    return pairs

def cosine_dists(A, B):
    if A.size == 0 or B.size == 0:
        return np.zeros((A.shape[0] if A.size else 0, B.shape[0] if B.size else 0))
    A_n = A / (np.linalg.norm(A, axis=1, keepdims=True) + 1e-9)
    B_n = B / (np.linalg.norm(B, axis=1, keepdims=True) + 1e-9)
    sims = A_n.dot(B_n.T)
    return 1.0 - sims

def estimate_scale(pixel_sizes, real_sizes):
    pixel = np.array(pixel_sizes, dtype=float)
    real = np.array(real_sizes, dtype=float)
    mask = (real > 0)
    if mask.sum() == 0:
        return None
    real_m = real[mask]
    pixel_m = pixel[mask]
    denom = (real_m**2).sum()
    if denom == 0:
        return None
    s = (pixel_m * real_m).sum() / denom
    if not np.isfinite(s) or s <= 0:
        return None
    return float(s)

def embed_images_clips(pil_imgs, batch_size=BATCH_EMBED_SIZE):
    if len(pil_imgs) == 0:
        return np.zeros((0, 512), dtype=np.float32)
    tensors = [clip_preprocess(im) for im in pil_imgs]
    embs = []
    with torch.no_grad():
        for i in range(0, len(tensors), batch_size):
            batch = torch.stack(tensors[i:i+batch_size]).to(device)
            f = clip_model.encode_image(batch)
            f = f / (f.norm(dim=-1, keepdim=True) + 1e-9)
            embs.append(f.cpu().numpy())
    return np.vstack(embs)

def embed_texts_clips(texts, batch_size=128):
    """
    Batch text embedding with safety for long titles.
    We assume texts are already shortened to be well under CLIP's 77-token limit.
    """
    if len(texts) == 0:
        return np.zeros((0, 512), dtype=np.float32)
    embs = []
    with torch.no_grad():
        for i in range(0, len(texts), batch_size):
            chunk = texts[i:i+batch_size]
            try:
                toks = clip.tokenize(chunk).to(device)
            except RuntimeError as e:
                # As a safety net, re-shortening if clip complains
                chunk = [shorten_text_for_clip(t, max_words=SHORTEN_WORDS//2) for t in chunk]
                toks = clip.tokenize(chunk).to(device)
            f = clip_model.encode_text(toks)
            f = f / (f.norm(dim=-1, keepdim=True) + 1e-9)
            embs.append(f.cpu().numpy())
    return np.vstack(embs)

def draw_annotations(pil_img, boxes, assignment, asin_to_name, save_path):
    draw = ImageDraw.Draw(pil_img)
    try:
        font = ImageFont.truetype(FONT_FILE, 14)
    except Exception:
        font = ImageFont.load_default()
    for i, box in enumerate(boxes):
        x1,y1,x2,y2 = map(int, box)
        asin = assignment.get(i, None)
        label = asin_to_name.get(asin, asin) if asin else "UNKNOWN"
        draw.rectangle([x1,y1,x2,y2], outline="red", width=2)
        # background for readability
        tw, th = draw.textsize(label, font=font)
        draw.rectangle([(x1, y1 - th - 4), (x1 + tw + 4, y1)], fill=(0,0,0))
        draw.text((x1+2, y1 - th - 2), label, fill=(255,255,0), font=font)
    pil_img.save(save_path)

# ---------------- TEXT SHORTENING + CACHING ----------------
def shorten_text_for_clip(text, max_words=SHORTEN_WORDS):
    """Clean and shorten product title for CLIP tokenization."""
    if text is None:
        return ""
    # remove HTML entities like &reg;
    text = re.sub(r"&[a-zA-Z0-9#]+;", " ", text)
    # remove non-alphanumeric except common punctuation
    text = re.sub(r"[^a-zA-Z0-9\s\-\.,]", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    words = text.split()
    short = " ".join(words[:max_words])
    # final cleanup
    short = short.strip()
    return short if short else text[:200]

text_embedding_cache = {}  # maps text -> embedding (numpy array)
def get_text_embeddings_cached(texts):
    """
    Given a list of texts (shortened), return embeddings using cache to avoid re-computation.
    Returns numpy array of shape (len(texts), dim).
    """
    to_compute = []
    idx_map = []
    for i, t in enumerate(texts):
        key = t
        if key in text_embedding_cache:
            idx_map.append((i, "cached"))
        else:
            idx_map.append((i, "compute"))
            to_compute.append(t)
    embs = np.zeros((len(texts), 512), dtype=np.float32)
    # fill cached
    for i, status in idx_map:
        if status == "cached":
            embs[i] = text_embedding_cache[texts[i]]
    # compute needed
    if to_compute:
        computed = embed_texts_clips(to_compute, batch_size=128)
        # assign back to cache in order
        ci = 0
        for i, status in idx_map:
            if status == "compute":
                emb = computed[ci]
                embs[i] = emb
                text_embedding_cache[texts[i]] = emb
                ci += 1
    return embs

# ---------------- Main: process all images (resume & incremental save) ----------------
all_imgs = sorted(list(Path(IMG_DIR).glob("*.jpg")))
if len(all_imgs) == 0:
    raise FileNotFoundError(f"No images found in {IMG_DIR}")

# Resume support
processed_images = set()
if RESULT_CSV.exists():
    try:
        existing = pd.read_csv(RESULT_CSV)
        processed_images = set(existing["image_id"].astype(str).unique().tolist())
        print(f"[info] Resuming. Found existing results with {len(processed_images)} processed images.")
    except Exception:
        processed_images = set()

results_rows = []
counter = 0
start_time = time.time()

for img_path in tqdm(all_imgs, desc="All images"):
    image_id = img_path.stem
    if image_id in processed_images:
        continue

    meta_path = Path(META_DIR) / f"{image_id}.json"
    if not meta_path.exists():
        if VERBOSE:
            print(f"[warn] metadata missing for {image_id}, skipping")
        processed_images.add(image_id)
        continue

    meta = json.load(open(meta_path, "r"))
    bin_data = meta.get("BIN_FCSKU_DATA", {})
    if not bin_data:
        if VERBOSE:
            print(f"[warn] BIN_FCSKU_DATA empty for {image_id}, skipping")
        processed_images.add(image_id)
        continue

    # Build asin info map & expanded instances
    asin_info = {}
    expanded_asins = []
    asin_to_name = {}
    for asin, info in bin_data.items():
        asin_info[asin] = info or {}
        qty = int(info.get("quantity", 1) or 1)
        for _ in range(max(1, qty)):
            expanded_asins.append(asin)
        asin_to_name[asin] = info.get("normalizedName") or info.get("name") or asin

    # YOLO inference
    try:
        res = yolo(str(img_path), device=device)
    except Exception as e:
        if VERBOSE:
            print(f"[error] YOLO inference failed for {image_id}: {e}")
        processed_images.add(image_id)
        continue

    r = res[0]
    if len(r.boxes) == 0:
        for asin, info in asin_info.items():
            results_rows.append({
                "image_id": image_id,
                "asin": asin,
                "name": asin_to_name.get(asin),
                "expected_qty": int(info.get("quantity",1) or 1),
                "detected_qty": 0,
                "match": False,
                "scale": None,
                "notes": "no_detections"
            })
        if VERBOSE:
            print(f"[info] no detections for {image_id}")
        processed_images.add(image_id)
        counter += 1
    else:
        boxes = r.boxes.xyxy.cpu().numpy().tolist()
        pil_img = load_image(img_path)
        crops = [crop_box(pil_img, b) for b in boxes]
        crop_metrics = [box_metrics(b) for b in boxes]
        pixel_size_measure = np.array([cm["max_dim"] for cm in crop_metrics], dtype=float)
        pixel_size_measure = np.nan_to_num(pixel_size_measure, nan=1.0, posinf=1e6, neginf=1.0)
        pixel_size_measure[pixel_size_measure <= 0] = 1.0

        # embed crops (batched)
        crop_embs = embed_images_clips(crops)

        # prepare and shorten texts for this image, use caching
        unique_asins = list(asin_info.keys())
        unique_texts_raw = [asin_to_name[a] for a in unique_asins]
        unique_texts = [shorten_text_for_clip(t, max_words=SHORTEN_WORDS) for t in unique_texts_raw]
        # get embeddings from cache or compute
        text_embs_unique = get_text_embeddings_cached(unique_texts)  # u x d
        if text_embs_unique.size == 0:
            # as fallback use raw ASIN strings
            text_embs_unique = get_text_embeddings_cached(unique_asins)

        # map expanded instances to embeddings
        expanded_text_embs = np.vstack([text_embs_unique[unique_asins.index(a)] for a in expanded_asins]) if expanded_asins else np.zeros((0,512))

        # visual cost matrix
        visual_cost = cosine_dists(crop_embs, expanded_text_embs)

        # compute real sizes
        real_sizes = []
        for a in expanded_asins:
            info = asin_info.get(a, {}) or {}
            L = safe_get_info(info, "length")
            W = safe_get_info(info, "width")
            H = safe_get_info(info, "height")
            vals = [v for v in (L,W,H) if v is not None and (isinstance(v,(int,float)) or str(v).replace('.','',1).isdigit())]
            try:
                vals = [float(v) for v in vals]
            except Exception:
                vals = []
            real_max = float(max(vals)) if vals else 1.0
            if not np.isfinite(real_max) or real_max <= 1e-6:
                real_max = 1.0
            real_sizes.append(real_max)
        real_sizes = np.array(real_sizes, dtype=float)

        try:
            approx_s = (np.median(pixel_size_measure) / np.median(real_sizes)) if real_sizes.size and np.median(real_sizes) > 0 else 1.0
        except Exception:
            approx_s = 1.0

        def size_penalty_matrix(s):
            expected = s * real_sizes if real_sizes.size else np.ones((0,))
            if expected.size == 0:
                return np.zeros((pixel_size_measure.shape[0], 0))
            P = np.abs(pixel_size_measure[:, None] - expected[None, :]) / (expected[None, :] + 1e-9)
            P = np.nan_to_num(P, nan=1e6, posinf=1e6, neginf=1e6)
            return P

        # assignment
        if visual_cost.size == 0:
            assignment = {}
            assigned_counts = Counter()
            s = None
            if VERBOSE:
                print(f"[warn] visual_cost empty for {image_id}")
        else:
            pairs = hungarian_from_cost(visual_cost)
            matched_pixel = [pixel_size_measure[r] for r,c in pairs] if pairs else []
            matched_real = [real_sizes[c] for r,c in pairs] if pairs else []
            s = estimate_scale(matched_pixel, matched_real) or approx_s

            for it in range(4):
                sz = size_penalty_matrix(s)
                v_norm = normalize_safe(visual_cost)
                sz_norm = normalize_safe(sz)
                cost = ALPHA * v_norm + BETA * sz_norm
                cost = np.nan_to_num(cost, nan=1e6, posinf=1e6, neginf=1e6)
                pairs = hungarian_from_cost(cost)
                if not pairs:
                    break
                matched_pixel = [pixel_size_measure[r] for r,c in pairs]
                matched_real = [real_sizes[c] for r,c in pairs]
                new_s = estimate_scale(matched_pixel, matched_real)
                if new_s is None:
                    break
                if abs(new_s - s) / (s + 1e-9) < 1e-3:
                    s = new_s
                    break
                s = new_s

            assignment = {r: expanded_asins[c] for r,c in pairs} if pairs else {}
            assigned_counts = Counter(assignment.values())

        # build rows for this image
        for asin, info in asin_info.items():
            expected_qty = int(info.get("quantity",1) or 1)
            detected_qty = assigned_counts.get(asin, 0)
            results_rows.append({
                "image_id": image_id,
                "asin": asin,
                "name": asin_to_name.get(asin),
                "expected_qty": expected_qty,
                "detected_qty": detected_qty,
                "match": detected_qty == expected_qty,
                "scale": float(s) if s is not None else None,
                "notes": None
            })

        # save annotated image
        out_path = ANNOTATED_DIR / f"{image_id}_classified.jpg"
        try:
            draw_annotations(pil_img, boxes, assignment, asin_to_name, str(out_path))
        except Exception as e:
            if VERBOSE:
                print(f"[warn] drawing annotations failed for {image_id}: {e}")

        processed_images.add(image_id)
        counter += 1

    # periodic save
    if counter % SAVE_EVERY == 0 and results_rows:
        try:
            if RESULT_CSV.exists():
                df_existing = pd.read_csv(RESULT_CSV)
                df_new = pd.DataFrame(results_rows)
                df_combined = pd.concat([df_existing, df_new], ignore_index=True)
                df_combined.to_csv(RESULT_CSV, index=False)
            else:
                pd.DataFrame(results_rows).to_csv(RESULT_CSV, index=False)
            results_rows = []
            if VERBOSE:
                print(f"[info] Flushed intermediate results to {RESULT_CSV}")
        except Exception as e:
            print(f"[error] Failed to flush intermediate results: {e}")

# final flush of any remaining rows
if results_rows:
    if RESULT_CSV.exists():
        df_existing = pd.read_csv(RESULT_CSV)
        df_new = pd.DataFrame(results_rows)
        df_combined = pd.concat([df_existing, df_new], ignore_index=True)
        df_combined.to_csv(RESULT_CSV, index=False)
    else:
        pd.DataFrame(results_rows).to_csv(RESULT_CSV, index=False)

elapsed = time.time() - start_time
print(f"\n[done] Processed all images in {elapsed:.1f}s. Results -> {RESULT_CSV}; annotated images -> {ANNOTATED_DIR}")


[info] Loading YOLO weights: runs/detect/train/weights/best.pt on device cuda
[info] Loading CLIP ViT-B/32 on cuda
[info] Resuming. Found existing results with 13949 processed images.


All images:   0%|                                                                                  | 0/50000 [00:00<?, ?it/s]


image 1/1 /dgxa_home/se22uari080/sub_bin_images/00003.jpg: 608x640 2 objects-b8uQs, 6.3ms
Speed: 1.6ms preprocess, 6.3ms inference, 1.0ms postprocess per image at shape (1, 3, 608, 640)
[warn] drawing annotations failed for 00003: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00004.jpg: 608x640 5 objects-b8uQs, 6.2ms
Speed: 1.8ms preprocess, 6.2ms inference, 1.1ms postprocess per image at shape (1, 3, 608, 640)


All images:   0%|                                                                        | 2/50000 [00:00<1:05:24, 12.74it/s]

[warn] drawing annotations failed for 00004: 'ImageDraw' object has no attribute 'textsize'
[warn] BIN_FCSKU_DATA empty for 00013, skipping

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00014.jpg: 384x640 3 objects-b8uQs, 7.1ms
Speed: 1.4ms preprocess, 7.1ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)
[warn] drawing annotations failed for 00014: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00015.jpg: 512x640 5 objects-b8uQs, 6.7ms
Speed: 1.6ms preprocess, 6.7ms inference, 1.1ms postprocess per image at shape (1, 3, 512, 640)


All images:   0%|                                                                          | 5/50000 [00:00<41:37, 20.01it/s]

[warn] drawing annotations failed for 00015: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00016.jpg: 512x640 3 objects-b8uQs, 6.2ms
Speed: 1.8ms preprocess, 6.2ms inference, 1.2ms postprocess per image at shape (1, 3, 512, 640)
[warn] drawing annotations failed for 00016: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00017.jpg: 512x640 4 objects-b8uQs, 6.1ms
Speed: 1.6ms preprocess, 6.1ms inference, 1.1ms postprocess per image at shape (1, 3, 512, 640)
[warn] drawing annotations failed for 00017: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00023.jpg: 640x640 5 objects-b8uQs, 6.7ms
Speed: 2.3ms preprocess, 6.7ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)


All images:   0%|                                                                          | 8/50000 [00:00<41:03, 20.29it/s]

[warn] drawing annotations failed for 00023: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00025.jpg: 640x576 3 objects-b8uQs, 7.2ms
Speed: 2.0ms preprocess, 7.2ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 576)
[warn] drawing annotations failed for 00025: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00030.jpg: 640x512 4 objects-b8uQs, 6.5ms
Speed: 1.5ms preprocess, 6.5ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 512)
[warn] drawing annotations failed for 00030: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00031.jpg: 640x512 3 objects-b8uQs, 6.3ms
Speed: 1.7ms preprocess, 6.3ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 512)


All images:   0%|                                                                         | 11/50000 [00:00<42:34, 19.57it/s]

[warn] drawing annotations failed for 00031: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00038.jpg: 608x640 3 objects-b8uQs, 6.7ms
Speed: 1.6ms preprocess, 6.7ms inference, 1.1ms postprocess per image at shape (1, 3, 608, 640)
[warn] drawing annotations failed for 00038: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00039.jpg: 640x640 6 objects-b8uQs, 6.7ms
Speed: 2.0ms preprocess, 6.7ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)


All images:   0%|                                                                         | 14/50000 [00:00<39:05, 21.31it/s]

[warn] drawing annotations failed for 00039: 'ImageDraw' object has no attribute 'textsize'
[warn] BIN_FCSKU_DATA empty for 00040, skipping

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00046.jpg: 448x640 4 objects-b8uQs, 6.7ms
Speed: 1.4ms preprocess, 6.7ms inference, 1.0ms postprocess per image at shape (1, 3, 448, 640)
[warn] drawing annotations failed for 00046: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00051.jpg: 480x640 4 objects-b8uQs, 6.7ms
Speed: 1.6ms preprocess, 6.7ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
[warn] drawing annotations failed for 00051: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00059.jpg: 640x512 4 objects-b8uQs, 8.2ms
Speed: 1.8ms preprocess, 8.2ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 512)


All images:   0%|                                                                         | 17/50000 [00:00<43:31, 19.14it/s]

[warn] drawing annotations failed for 00059: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00066.jpg: 608x640 5 objects-b8uQs, 8.7ms
Speed: 1.7ms preprocess, 8.7ms inference, 1.5ms postprocess per image at shape (1, 3, 608, 640)
[warn] drawing annotations failed for 00066: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00073.jpg: 640x512 5 objects-b8uQs, 8.4ms
Speed: 1.9ms preprocess, 8.4ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 512)


All images:   0%|                                                                         | 19/50000 [00:01<47:58, 17.36it/s]

[warn] drawing annotations failed for 00073: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00084.jpg: 576x640 2 objects-b8uQs, 8.3ms
Speed: 2.1ms preprocess, 8.3ms inference, 1.2ms postprocess per image at shape (1, 3, 576, 640)
[warn] drawing annotations failed for 00084: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00091.jpg: 384x640 5 objects-b8uQs, 8.4ms
Speed: 1.7ms preprocess, 8.4ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)


All images:   0%|                                                                         | 21/50000 [00:01<49:59, 16.66it/s]

[warn] drawing annotations failed for 00091: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00098.jpg: 384x640 5 objects-b8uQs, 7.1ms
Speed: 1.7ms preprocess, 7.1ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)
[warn] drawing annotations failed for 00098: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00102.jpg: 576x640 3 objects-b8uQs, 6.7ms
Speed: 2.1ms preprocess, 6.7ms inference, 1.0ms postprocess per image at shape (1, 3, 576, 640)


All images:   0%|                                                                         | 23/50000 [00:01<50:32, 16.48it/s]

[warn] drawing annotations failed for 00102: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00105.jpg: 512x640 5 objects-b8uQs, 6.7ms
Speed: 1.8ms preprocess, 6.7ms inference, 1.0ms postprocess per image at shape (1, 3, 512, 640)
[warn] drawing annotations failed for 00105: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00109.jpg: 448x640 7 objects-b8uQs, 6.7ms
Speed: 1.4ms preprocess, 6.7ms inference, 1.0ms postprocess per image at shape (1, 3, 448, 640)


All images:   0%|                                                                         | 25/50000 [00:01<51:38, 16.13it/s]

[warn] drawing annotations failed for 00109: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00114.jpg: 448x640 2 objects-b8uQs, 6.2ms
Speed: 1.6ms preprocess, 6.2ms inference, 1.1ms postprocess per image at shape (1, 3, 448, 640)
[warn] drawing annotations failed for 00114: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00144.jpg: 608x640 5 objects-b8uQs, 6.7ms
Speed: 1.8ms preprocess, 6.7ms inference, 1.0ms postprocess per image at shape (1, 3, 608, 640)


All images:   0%|                                                                         | 27/50000 [00:01<49:39, 16.77it/s]

[warn] drawing annotations failed for 00144: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00151.jpg: 480x640 6 objects-b8uQs, 9.4ms
Speed: 1.6ms preprocess, 9.4ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)
[warn] drawing annotations failed for 00151: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00156.jpg: 480x640 5 objects-b8uQs, 6.2ms
Speed: 1.6ms preprocess, 6.2ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)


All images:   0%|                                                                         | 29/50000 [00:01<50:36, 16.46it/s]

[warn] drawing annotations failed for 00156: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00158.jpg: 576x640 6 objects-b8uQs, 6.8ms
Speed: 1.7ms preprocess, 6.8ms inference, 1.0ms postprocess per image at shape (1, 3, 576, 640)
[warn] drawing annotations failed for 00158: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00162.jpg: 576x640 3 objects-b8uQs, 6.4ms
Speed: 1.8ms preprocess, 6.4ms inference, 1.1ms postprocess per image at shape (1, 3, 576, 640)


All images:   0%|                                                                         | 31/50000 [00:01<50:27, 16.50it/s]

[warn] drawing annotations failed for 00162: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00168.jpg: 384x640 6 objects-b8uQs, 6.9ms
Speed: 1.2ms preprocess, 6.9ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
[warn] drawing annotations failed for 00168: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00184.jpg: 448x640 2 objects-b8uQs, 6.7ms
Speed: 1.5ms preprocess, 6.7ms inference, 1.1ms postprocess per image at shape (1, 3, 448, 640)


All images:   0%|                                                                         | 33/50000 [00:01<49:34, 16.80it/s]

[warn] drawing annotations failed for 00184: 'ImageDraw' object has no attribute 'textsize'

image 1/1 /dgxa_home/se22uari080/sub_bin_images/00196.jpg: 576x640 6 objects-b8uQs, 6.9ms
Speed: 1.8ms preprocess, 6.9ms inference, 1.1ms postprocess per image at shape (1, 3, 576, 640)


In [2]:
import pandas as pd

# Load your classified results
df = pd.read_csv("outputs/classified/results.csv")

# -------------------------------------------
# 1. IMAGE-LEVEL ACCURACY
# -------------------------------------------
image_acc = (
    df.groupby("image_id")["match"]
      .mean()   # average match per image
      .reset_index()
      .rename(columns={"match": "image_accuracy"})
)

print("Per-image accuracy:")
print(image_acc.head())


# -------------------------------------------
# 2. OVERALL ACCURACY (ACROSS ALL ASIN ENTRIES)
# -------------------------------------------
total_entries = len(df)
total_correct = df["match"].sum()
overall_accuracy = total_correct / total_entries if total_entries > 0 else 0

print("\n================ OVERALL DATASET SUMMARY ================")
print(f"Total Images:            {df['image_id'].nunique()}")
print(f"Total ASIN Entries:      {total_entries}")
print(f"Correct Matches:         {total_correct}")
print(f"Incorrect Matches:       {total_entries - total_correct}")
print(f"Overall Accuracy:        {overall_accuracy*100:.2f}%")
print("=========================================================\n")

# -------------------------------------------
# 3. ASIN-LEVEL ACCURACY (optional)
# -------------------------------------------
asin_acc = (
    df.groupby("asin")["match"]
      .mean()
      .reset_index()
      .rename(columns={"match": "asin_accuracy"})
)

print("Per-ASIN accuracy (first 10):")
print(asin_acc.head(10))


# -------------------------------------------
# 4. MERGE IMAGE-LEVEL ACCURACY BACK
# -------------------------------------------
df_with_acc = df.merge(image_acc, on="image_id", how="left")

# If you want: save a CSV for analysis
df_with_acc.to_csv("outputs/classified/results_with_accuracy.csv", index=False)

print("\nSaved results_with_accuracy.csv")

Per-image accuracy:
   image_id  image_accuracy
0         3        0.333333
1         4        1.000000
2        14        1.000000
3        15        0.500000
4        16        0.500000

Total Images:            49091
Total ASIN Entries:      136052
Correct Matches:         87764
Incorrect Matches:       48288
Overall Accuracy:        64.51%

Per-ASIN accuracy (first 10):
         asin  asin_accuracy
0  0001388487       0.000000
1  000217412X       1.000000
2  000255450X       1.000000
3  0002624028       1.000000
4  0007446993       1.000000
5  0008127530       1.000000
6  0020435207       1.000000
7  002081030X       1.000000
8  0021440719       0.333333
9  002166790X       1.000000

Saved results_with_accuracy.csv
