In [1]:
## GraphRAG
import re

In [2]:
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

  import pynvml  # type: ignore[import]


In [3]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma

In [4]:
from typing import TypedDict,List

In [5]:
class RAGState(TypedDict):
    question:str
    context:List[Document]
    answer:str

In [6]:
doc=Document(
    page_content="LLMs RAG Chatbot",
    metadata={
        "source":"foundations_llms.pdf",
        "date_created":"20 January 2026"
    }
)
doc

Document(metadata={'source': 'foundations_llms.pdf', 'date_created': '20 January 2026'}, page_content='LLMs RAG Chatbot')

In [None]:
loader=PyPDFLoader("foundations_llms.pdf")
document=loader.load()

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings,HuggingFaceEndpoint,ChatHuggingFace
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [None]:
## create the text splitter:
text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    separators=["\n\n","\n"," ", ""]
)
text_splitter

In [None]:
chunks=text_splitter.split_documents([doc])
chunks

In [None]:
embedding=HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2")


In [None]:
vector_store=Chroma.from_documents(
    documents=chunks,
    embedding=embedding)
vector_store

In [None]:
retriever=vector_store.as_retriever(
    search_type="similarity",
    search_kwags={"k":2}
)
retriever

In [None]:
prompt=ChatPromptTemplate.from_template(
    """
    You are a useful assistant that answers questions with honesty and integrity.
    Answer the questions uisng the below context.
    If the answer is not within the context then say I dont know

    Context:
    {context}

    Question:
    {question}
    
    """
)

In [None]:
llm_endpoint=HuggingFaceEndpoint(
    repo_id="deepseek-ai/DeepSeek-R1-0528",
    task="text-generation",
    max_new_tokens=512,
    do_sample=False,
    provider="auto")

chat_model=ChatHuggingFace(llm=llm_endpoint)


In [None]:
def retrieve_node(state:RAGState)->RAGState:
    docs=retriever.invoke(state["question"])

    return {
    "question":state["question"],
    "context":docs
    }

In [None]:
def generation_node(state:RAGState)->RAGState:
    response=(
        prompt
        |chat_model
        |StrOutputParser()
    ).invoke({
        "question":state["question"],
        "context":state["context"]
    }
            )

    return {"answer":response}

In [None]:
from langgraph.graph import StateGraph,END

In [None]:
graph=StateGraph(RAGState)

graph.add_node("retrieve",retrieve_node)
graph.add_node("generate",generation_node)

graph.set_entry_point("retrieve")
graph.add_edge("retrieve","generate")
graph.add_edge("generate",END)

In [None]:
rag_app=graph.compile()
rag_app

In [None]:
result = rag_app.invoke({
    "question": "What are LLMs?"
})

answer=re.findall(r"\*\*(.*?)\*\*", result["answer"])[0]

print(answer)