In [1]:
import json
import numpy as np
import faiss
from pathlib import Path
from sentence_transformers import SentenceTransformer


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
processed_path = Path("E:/MiiHA/app/data/processed/openfda_indications.jsonl")

docs = []
with open(processed_path, "r", encoding="utf-8") as f:
    for line in f:
        docs.append(json.loads(line))

texts = [doc["text"] for doc in docs]

print(f"✅ Loaded {len(texts)} OpenFDA chunks")


✅ Loaded 1 OpenFDA chunks


In [3]:
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
embeddings = model.encode(texts, show_progress_bar=True)


Batches: 100%|██████████| 1/1 [00:00<00:00, 25.86it/s]


In [4]:
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings).astype("float32"))

print("✅ FAISS index created and populated")


✅ FAISS index created and populated


In [5]:
faiss_output_path = Path("E:/MiiHA/app/db/openfda_drug.index")
faiss_output_path.parent.mkdir(parents=True, exist_ok=True)
faiss.write_index(index, str(faiss_output_path))


In [6]:
metadata = [{"id": doc["id"], "drug_name": doc["drug_name"], "route": doc["route"], "purpose": doc["purpose"]} for doc in docs]

metadata_output_path = Path("E:/MiiHA/app/data/metadata/openfda_metadata.json")
metadata_output_path.parent.mkdir(parents=True, exist_ok=True)
with open(metadata_output_path, "w", encoding="utf-8") as f:
    json.dump(metadata, f, indent=2)

print("✅ Saved FAISS index and metadata successfully!")


✅ Saved FAISS index and metadata successfully!
