In [None]:
!pip install -q langchain chromadb faiss-cpu pypdf sentence-transformers transformers ipywidgets


In [None]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from transformers import pipeline
from google.colab import files
from ipywidgets import widgets, Output
from IPython.display import display


In [None]:
uploaded = files.upload()
pdf_path = next(iter(uploaded))
print(f"Uploaded file: {pdf_path}")


In [None]:
loader = PyPDFLoader(pdf_path)
docs = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(docs)

print(f"Total chunks: {len(chunks)}")


In [None]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever()
print("Vectorstore ready.")


In [None]:
llm = pipeline(
    "text2text-generation",
    model="google/flan-t5-small",
    max_new_tokens=100,
    do_sample=False,
)
print("LLM loaded.")



In [None]:
def answer_query(query):
    docs = retriever.get_relevant_documents(query)
    for i, doc in enumerate(docs[:2]):
        print(f"\n[Context {i+1}]\n{doc.page_content[:500]}...\n")
    context = "\n\n".join([d.page_content for d in docs[:3]])
    prompt = f"""You are a precise assistant. Use the context to answer. If not found, say 'Information not found.'

Context:
{context}

Question:
{query}

Answer:"""
    return llm(prompt)[0]['generated_text'].strip()



In [None]:
out = Output()

def on_ask_clicked(b):
    with out:
        out.clear_output()
        question = text.value.strip()
        if question:
            print(f"Q: {question}")
            print("A:", answer_query(question))

text = widgets.Text(placeholder='Type your question here')
button = widgets.Button(description="Ask")
button.on_click(on_ask_clicked)

display(text, button, out)
