LLM chatbot has already four steps

Load + clean PDF

Split into chunks

Embed + retrieve

RAG prompt → LLM → answer

In LangGraph, each of these becomes a node that reads/writes shared state.

Define the graph state

LangGraph uses a typed state dictionary.

In [2]:
from typing import TypedDict, List
from langchain_core.documents import Document

class RAGState(TypedDict):
    question: str
    docs: List[Document]
    context: str
    answer: str

### Nodes


1️⃣ Load + clean PDF

In [4]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate


def load_pdf(state: RAGState):
    loader = PyPDFLoader("1._Intro_to_AI_-_Course_notes.pdf")
    docs = loader.load()

    for d in docs:
        d.page_content = d.page_content.replace("\x00", "").replace("\n", " ").strip()

    return {"docs": docs}

2️⃣ Split documents

In [5]:
def split_docs(state: RAGState):
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    chunks = splitter.split_documents(state["docs"])
    return {"docs": chunks}

3️⃣ Retrieve relevant context

In [23]:
embeddings = OllamaEmbeddings(model="nomic-embed-text")

vectorstore = Chroma(
    persist_directory="intro-to-ai",
    embedding_function=embeddings
)

retriever = vectorstore.as_retriever(search_kwargs={"k": 8})


def retrieve(state: RAGState):
    docs = retriever.invoke(state["question"])
    context = "\n\n".join(d.page_content for d in docs)
    return {"context": context}

4️⃣ Generate answer (RAG)

In [18]:
prompt = ChatPromptTemplate.from_template("""
You are a helpful assistant.
Use the context below to answer the question.

Context:
{context}

Question:
{question}
""")

llm = ChatOllama(model="llama3")

def generate_answer(state: RAGState):
    messages = prompt.invoke({
        "context": state["context"],
        "question": state["question"]
    })
    response = llm.invoke(messages)
    return {"answer": response.content}

### Build the LangGraph

In [19]:
from langgraph.graph import StateGraph, END

graph = StateGraph(RAGState)

graph.add_node("load_pdf", load_pdf)
graph.add_node("split_docs", split_docs)
graph.add_node("retrieve", retrieve)
graph.add_node("generate_answer", generate_answer)

graph.set_entry_point("load_pdf")

graph.add_edge("load_pdf", "split_docs")
graph.add_edge("split_docs", "retrieve")
graph.add_edge("retrieve", "generate_answer")
graph.add_edge("generate_answer", END)

app = graph.compile()

In [20]:
def ask_chatbot(question: str) -> str:
    result = app.invoke({"question": question})
    return result["answer"]

In [None]:
print(ask_chatbot("What did Alan Turing do?"))