In [None]:
from langchain_community.document_loaders import PDFPlumberLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI

from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser


In [None]:
import os
os.environ["OPENAI_API_KEY"] = "my-api-key"
os.environ["OPENAI_API_BASE"] = "https://openrouter.ai/api/v1"

In [None]:
#load PDF
loader = PDFPlumberLoader("/content/Quantum Computing.pdf")
documents = loader.load()

print(f"Loaded {len(documents)} pages")

#Chunking
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=150
)

docs = text_splitter.split_documents(documents)

#VectorDB
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)
vectorstore = FAISS.from_documents(docs, embeddings)
retriever = vectorstore.as_retriever(search_type="similarity",search_kwargs={"k": 2})

#OpenRouter backend
llm = ChatOpenAI(
    model="meta-llama/llama-3.1-8b-instruct",
    temperature=0.7,
    openai_api_base=os.environ["OPENAI_API_BASE"],
    openai_api_key=os.environ["OPENAI_API_KEY"],
    request_timeout=60,
)
#prompt
prompt = PromptTemplate.from_template("""
You are a helpful document-based assistant.

Answer the question using ONLY the provided context.
Keep the answer concise (3–4 sentences).
If the answer is not present, say:
"I don't know based on the document."

Context:
{context}

Question:
{question}

Answer:
""")

rag_chain = (
    {"context": retriever,"question": RunnablePassthrough()}| prompt| llm | StrOutputParser()
    )

In [None]:
print("\nRAG Doc-QA Ready")
print("Type 'exit' to quit\n")

while True:
    query = input("You: ")

    if query.lower() in ["exit", "quit"]:
        break

    answer = rag_chain.invoke(query)

    print("\nAssistant:\n", answer)

    # Optional: show sources
    source_docs = retriever.invoke(query)
    print("\nSources:")
    for i, doc in enumerate(source_docs, start=1):
        print(f"Source {i} | Page {doc.metadata.get('page')}")