In [1]:
import torch
import pandas as pd
import numpy as np
from PIL import Image
from transformers import AutoModel, AutoProcessor
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import sys
sys.path.append('../src')
from preprocess import initialize_clip_model, generate_embedding
from retrieval import hybrid_search

In [3]:
device = "cuda" if torch.cuda.is_available() else "mps" if torch.mps.is_available() else "cpu"
print(f"Device is {device}")

Device is mps


In [4]:
# Load data
SAMPLE_SIZE = 20

df = pd.read_parquet('../data/merged_output_sample_100k.parquet')

if SAMPLE_SIZE < len(df):
    df = df.sample(n=SAMPLE_SIZE, random_state=42)
    print(f"Using {SAMPLE_SIZE} samples for evaluation")
else:
    print(f"Using all {len(df)} samples for evaluation")

df.shape

Using 20 samples for evaluation


(20, 30)

In [5]:
# Models to test
clip_model = "openai/clip-vit-base-patch32"
initialize_clip_model(clip_model)
k=10

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [6]:
hits = 0

for idx, row in df.iterrows():
    # Generate embedding for the query (text or image)
    query_text = row.get('Name', None)
    query_image_path = f"../data/images/{row['Pid']}.jpeg"  # Adjust if your image path is different

    embedding = generate_embedding(query_text=query_text, query_image_path=query_image_path)
    
    # Run retrieval
    retrieved_pids, _ = hybrid_search(query_text, embedding, top_k=k)
    
    # Check if the ground truth Pid is in the top-k results
    if row['Pid'] in retrieved_pids:
        hits += 1

recall_at_k = hits / len(df)
print(f"Recall@{k}: {recall_at_k:.4f}")

Recall@10: 1.0000
