In [5]:
# ===============================
# Notebook 04: Load texts, embeddings, and FAISS index (corrected for your setup)
# ===============================

import pickle
import numpy as np
import faiss
import os

# ------------------------------
# Step 1: Load texts and embeddings
# ------------------------------
EMB_PKL = "all_pubmed_embeddings.pkl"  # From Notebook 03
FAISS_INDEX = "pubmed_faiss.index"

if not os.path.exists(EMB_PKL):
    raise FileNotFoundError(f"{EMB_PKL} not found. Run Notebook 03 to generate embeddings.")

# Load both texts and embeddings (they were saved as a tuple)
with open(EMB_PKL, "rb") as f:
    data = pickle.load(f)

texts, embeddings = data  # unpack tuple
embeddings = np.array(embeddings, dtype=np.float32)  # ensure correct dtype

print(f"✅ Loaded {len(texts)} texts, embeddings shape = {embeddings.shape}")

# ------------------------------
# Step 2: Load FAISS index
# ------------------------------
if not os.path.exists(FAISS_INDEX):
    raise FileNotFoundError(f"{FAISS_INDEX} not found. Make sure Notebook 03 built the FAISS index.")

index = faiss.read_index(FAISS_INDEX)
print("✅ FAISS index loaded, ntotal =", index.ntotal)

# ------------------------------
# Step 3: Ready for model (Notebook 05)
# ------------------------------
print("\n🎯 Notebook 04 setup complete. You can now proceed to Notebook 05 for model loading and query.")


✅ Loaded 1500 texts, embeddings shape = (1500, 384)
✅ FAISS index loaded, ntotal = 1500

🎯 Notebook 04 setup complete. You can now proceed to Notebook 05 for model loading and query.
