In [1]:
from similarity_engine import SimilarityEngineOrcid
import numpy as np

In [None]:
# 0. Instantiate your engine once (so it loads the index, vectorizer, etc.)
engine = SimilarityEngineOrcid()

# 1. Now benchmark using that instance:
import numpy as np
import time

queries = ["stellar mass", "supernovae"]

orig_times = []
faiss_times = []

for q in queries:
    # time original
    t0 = time.perf_counter()
    orig_idx = engine.query_experts(q)  
    dt_orig = time.perf_counter() - t0
    orig_times.append(dt_orig)

    # time FAISS
    t1 = time.perf_counter()
    faiss_idx = engine.query_experts_with_faiss(q)
    dt_faiss = time.perf_counter() - t1
    faiss_times.append(dt_faiss)

    print(f"Query '{q}': original = {dt_orig:.6f}s, FAISS = {dt_faiss:.6f}s")

# summary
print("\nAverage timings:")
print(f"  Original avg time: {np.mean(orig_times):.6f} s")
print(f"  FAISS    avg time: {np.mean(faiss_times):.6f} s\n")

print("Sample top-5 (idx, score):")
print("  Original:", list(zip(orig_idx[:5])))
print("  FAISS:   ", list(zip(faiss_idx[:5])))
