In [10]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain.llms import HuggingFacePipeline
import os
 
# -------- CONFIG ---------
FAISS_PATH = "multi_format_faiss_index"  # âœ… Matches py.py output
MODEL_ID = "google/flan-t5-base"
DEVICE = "cpu"
CHUNKS_TO_USE = 4
MAX_ANSWER_LENGTH = 512
# -------------------------
 
print(f"FAISS index exists? {os.path.exists(os.path.join(FAISS_PATH, 'index.faiss'))}")
 
def load_vectorstore():
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs={"device": DEVICE},
        encode_kwargs={"normalize_embeddings": False}
    )
    vectorstore = FAISS.load_local(FAISS_PATH, embeddings, allow_dangerous_deserialization=True)
    return vectorstore
 
def get_top_chunks(vectorstore, query, k=CHUNKS_TO_USE):
    results = vectorstore.similarity_search(query, k=k)
    return [(doc.page_content, doc.metadata.get("source", "")) for doc in results]
 
def setup_llm():
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
    model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID)
    gen_pipeline = pipeline(
        "text2text-generation",
        model=model,
        tokenizer=tokenizer,
        device=0 if DEVICE == "cuda" else -1,
        max_length=MAX_ANSWER_LENGTH,
    )
    return HuggingFacePipeline(pipeline=gen_pipeline)
 
def build_prompt(context, question):
    prompt_template = (
        "Answer the question based on the following context.\n\n"
        "Context:\n{context}\n\n"
        "Question: {question}\n\n"
        "Answer:"
    )
    return prompt_template.format(context=context, question=question)
 
def semantic_search(query):
    vectorstore = load_vectorstore()
    llm = setup_llm()
 
    top_chunks = get_top_chunks(vectorstore, query)
    if not top_chunks:
        print("No relevant documents found.")
        return
 
    context = "\n\n".join([text for text, _ in top_chunks])
    prompt = build_prompt(context, query)
 
    answer = llm(prompt)
    if isinstance(answer, list):
        answer = answer[0].get("generated_text", "")
 
    print("\n--- Semantic Answer ---\n")
    print(answer.strip())
    print("\n--- Sources ---")
    for i, (_, src) in enumerate(top_chunks, 1):
        print(f"{i}. {src if src else 'Unknown source'}")
    print()
 
if __name__ == "__main__":
    print("Semantic Search Engine (w/ LLM Summary)")
    while True:
        query = input("\nEnter your question (or 'exit' to quit): ").strip()
        if query.lower() in {"exit", "quit"}:
            print("Exiting.")
            break
        semantic_search(query)

    


FAISS index exists? True
Semantic Search Engine (w/ LLM Summary)


Device set to use cpu
Token indices sequence length is longer than the specified maximum sequence length for this model (642 > 512). Running this sequence through the model will result in indexing errors


KeyboardInterrupt: 