# Halo Evaluation Notebook

In [None]:
# Environment + helpers
import time
import numpy as np
import matplotlib.pyplot as plt
from umap import UMAP
from tqdm import tqdm

from halo.config import get_config
from halo.search import PhotoSearcher
from halo.ingestion import PhotoIndexer

cfg = get_config()
searcher = PhotoSearcher()
indexer = PhotoIndexer()
print(f'Vector store directory: {cfg.chroma_db_dir}')

In [None]:
# Retrieval ablation: CLIP only vs hybrid vs hybrid+LLM
test_queries = [
    'late summer glow',
    'cozy indoor rainy day',
    'images with both water and mountains',
    'friends laughing at sunset',
]

def run_eval(expand: bool, caption_weight: float):
    local_searcher = PhotoSearcher(caption_weight=caption_weight)
    rows = []
    for q in tqdm(test_queries):
        hits = local_searcher.search_text(q, k=12, expand=expand)
        rows.append({
            'query': q,
            'expand': expand,
            'caption_weight': caption_weight,
            'top_paths': [h.path for h in hits]
        })
    return rows

clip_only = run_eval(expand=False, caption_weight=0.0)
hybrid = run_eval(expand=False, caption_weight=0.4)
hybrid_expanded = run_eval(expand=True, caption_weight=0.4)

print('Example qualitative result:')
for label, rows in [('clip', clip_only), ('hybrid', hybrid), ('hybrid+llm', hybrid_expanded)]:
    first_query = rows[0]['query']
    top_path = rows[0]['top_paths'][0] if rows[0]['top_paths'] else '∅'
    print(f"{label}: {first_query} -> {top_path}")

In [None]:
# Latency sweep vs collection size (configure sizes as needed)
sizes = [200, 500, 1000, 2000]
latency_stats = []

for size in sizes:
    start = time.perf_counter()
    _ = searcher.search_text('golden hour outdoors', k=12)
    latency_stats.append({
        'size': size,
        'latency_ms': (time.perf_counter() - start) * 1000
    })

plt.figure(figsize=(6, 4))
plt.plot([row['size'] for row in latency_stats], [row['latency_ms'] for row in latency_stats], marker='o')
plt.xlabel('Indexed photos')
plt.ylabel('Latency (ms)')
plt.title('Search latency vs collection size')
plt.grid(True)
plt.show()

In [None]:
# Embedding visualization with UMAP (image embeddings)
collection = searcher.image_collection
records = collection.get(include=['embeddings', 'documents'])
if records and records.get('embeddings'):
    emb = np.array(records['embeddings'])
    reducer = UMAP(n_neighbors=15, min_dist=0.1, metric='cosine')
    emb_2d = reducer.fit_transform(emb)
    plt.figure(figsize=(6, 6))
    plt.scatter(emb_2d[:, 0], emb_2d[:, 1], s=10, alpha=0.6)
    plt.title('CLIP embeddings (UMAP)')
    plt.xlabel('Component 1')
    plt.ylabel('Component 2')
    plt.show()
else:
    print('No embeddings available – run the ingestion pipeline first.')