In [1]:
!pip install -U langchain langchain-community langchain-huggingface langgraph faiss-cpu sentence-transformers huggingface-hub






Collecting langchain
  Downloading langchain-1.2.3-py3-none-any.whl.metadata (4.9 kB)
Collecting langchain-community
  Downloading langchain_community-0.4.1-py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-huggingface
  Downloading langchain_huggingface-1.2.0-py3-none-any.whl.metadata (2.8 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Collecting huggingface-hub
  Downloading huggingface_hub-1.3.1-py3-none-any.whl.metadata (13 kB)
Collecting langchain-classic<2.0.0,>=1.0.0 (from langchain-community)
  Downloading langchain_classic-1.0.1-py3-none-any.whl.metadata (4.2 kB)
Collecting requests<3.0.0,>=2.32.5 (from langchain-community)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0

In [2]:
!pip install -U langchain-text-splitters




In [3]:
!pip install -U langchain-community




In [4]:
!pip install transformers accelerate




In [5]:
!pip install pypdf


Collecting pypdf
  Downloading pypdf-6.6.0-py3-none-any.whl.metadata (7.1 kB)
Downloading pypdf-6.6.0-py3-none-any.whl (328 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m329.0/329.0 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-6.6.0


In [6]:
import os
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"


In [7]:
%%writefile loader.py
from langchain_community.document_loaders import PyPDFLoader

def load_pdf(path):
    loader = PyPDFLoader(path)
    docs = loader.load()

    for d in docs:
        d.metadata["source"] = path
        d.metadata["page"] = d.metadata.get("page", 0)

    return docs   # ⬅️ return full Document objects



Writing loader.py


In [8]:
%%writefile state.py
from typing import List
from typing_extensions import TypedDict

class ChatState(TypedDict):
    question: str
    context: List[str]
    answer: str
    history: List[str]


Writing state.py


In [9]:
%%writefile ingest.py
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

def ingest_docs(docs):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=50
    )

    split_docs = splitter.split_documents(docs)
    vector_db = FAISS.from_documents(split_docs, embeddings)
    return vector_db




Writing ingest.py


In [10]:
%%writefile retriever.py
def get_retriever(vector_db):
    return vector_db.as_retriever(search_kwargs={"k": 3})


Writing retriever.py


In [11]:
%%writefile nodes.py
from transformers import pipeline

# Local text2text model (stable)
generator = pipeline(
    "text2text-generation",
    model="google/flan-t5-small"
)

def rewrite_node(state):
    prompt = f"""
Rewrite the following question into a clear,
complete question that can be answered from documents.

Question: {state['question']}
"""

    result = generator(prompt, max_new_tokens=64)
    return {"question": result[0]["generated_text"].strip()}



def retrieve_node(state, retriever):
    docs = retriever.invoke(
        state["question"],
        filter={
            "source": {
                "$in": [
                    "/content/2507.18910v1.pdf",
                    "/content/IJNRD2506195.pdf"
                ]
            }
        }
      )

    # keep best chunks only
    docs = sorted(docs, key=lambda d: len(d.page_content), reverse=True)
    docs = docs[:3]

    cleaned_context = []
    for d in docs:
        text = d.page_content.replace("\n", " ").strip()
        cleaned_context.append(text)

    return {
        "context": cleaned_context,
        "sources": [
            f"{d.metadata.get('source')} (page {d.metadata.get('page')})"
            for d in docs
        ]
    }


def generate_node(state):
    prompt = f"""
You are a technical assistant.

Use ONLY the given context to answer.
Do NOT add external knowledge.

Your answer must:
1. Start with a clear definition
2. Add 1–2 lines of explanation
3. Be concise and factual

Context:
{" ".join(state["context"])}

Question:
{state["question"]}
"""
    result = generator(prompt, max_new_tokens=180)
    return {"answer": result[0]["generated_text"].strip()}


def memory_node(state):
    history = state.get("history", [])
    history.append({
        "question": state["question"],
        "answer": state["answer"]
    })
    return {"history": history}




Writing nodes.py


In [12]:
%%writefile graph.py
from langgraph.graph import StateGraph
from state import ChatState
from nodes import retrieve_node, generate_node, memory_node, rewrite_node

def build_graph(retriever):
    graph = StateGraph(ChatState)

    graph.add_node("rewrite", rewrite_node)
    graph.add_node("retrieve", lambda s: retrieve_node(s, retriever))
    graph.add_node("generate", generate_node)
    graph.add_node("memory", memory_node)

    graph.set_entry_point("rewrite")

    graph.add_edge("rewrite", "retrieve")
    graph.add_edge("retrieve", "generate")
    graph.add_edge("generate", "memory")

    return graph.compile()



Writing graph.py


In [13]:
%%writefile main.py
import os
import warnings
import logging

# Disable GPU usage
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

# Silence TensorFlow logs
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

# Silence HuggingFace + Transformers
os.environ["TRANSFORMERS_VERBOSITY"] = "error"

# Disable warnings
warnings.filterwarnings("ignore")

# Silence logging
logging.getLogger().setLevel(logging.ERROR)


from loader import load_pdf
from ingest import ingest_docs
from retriever import get_retriever
from graph import build_graph


docs = []
docs.extend(load_pdf("/content/2507.18910v1.pdf"))
docs.extend(load_pdf("/content/IJNRD2506195.pdf"))


vector_db = ingest_docs(docs)
retriever = get_retriever(vector_db)
chatbot = build_graph(retriever)

state = {
    "question": "What is Retrieval-Augmented Generation in LLMs?",
    "context": [],
    "answer": "",
    "history": []
}

result = chatbot.invoke(state)
print("Answer:", result["answer"])
print("History:", result["history"])


Writing main.py


In [14]:
!python main.py


E0000 00:00:1768192161.931536    4776 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1768192161.935739    4776 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1768192161.950322    4776 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1768192161.950374    4776 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1768192161.950378    4776 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1768192161.950382    4776 computation_placer.cc:177] computation placer already registered. Please check linka