In [20]:
from langchain_openai import ChatOpenAI
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings
from langchain_classic import hub
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage
from typing_extensions import List, TypedDict
from langgraph.graph import START, StateGraph
import gradio as gr

<p>Import llm tools</p>

In [2]:
llm = ChatOpenAI (
api_key = "ollama" ,
model = "qwen3" ,
base_url = "http://localhost:11434/v1" ,
temperature = 0
)

embeddings = OllamaEmbeddings(model="nomic-embed-text")
vector_store = InMemoryVectorStore(embeddings)

<p>Import document(s) and embed them in a vector store</p>

In [33]:
file_path = "GFNFoundations.pdf"
loader = PyPDFLoader(file_path)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,  # chunk size (characters)
    chunk_overlap=250,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)
all_splits = text_splitter.split_documents(docs)

print(f"Split blog post into {len(all_splits)} sub-documents.")

document_ids = vector_store.add_documents(documents=all_splits)
print(document_ids[:3])

prompt = hub.pull("rlm/rag-prompt")

Split blog post into 134 sub-documents.
['a5b8dfbe-5890-44b8-ae3a-c5943016ccc5', 'f3414950-aeb3-4cfe-87ea-fd5a94843cbe', '892a99fb-f35d-4fc7-b685-d1c2b11b69f7']


<p>Create state graph</p>

In [32]:
class State(TypedDict):
    question: str
    context: List[Document]
    chat_history: List[dict]
    answer: str

def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an assistant for question-answering tasks. Use the following context to answer the user's question accurately and concisely."),
    MessagesPlaceholder(variable_name="chat_history", optional=True),
    ("human", "Context: {context}\n\nQuestion: {question}")
])

def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    
    history_messages = []
    for msg in state.get("chat_history", []):
        if msg["role"] == "user":
            history_messages.append(HumanMessage(content=msg["content"]))
        elif msg["role"] == "assistant":
            history_messages.append(AIMessage(content=msg["content"]))
    
    messages = prompt.invoke({
        "question": state["question"],
        "context": docs_content,
        "chat_history": history_messages
    })
    response = llm.invoke(messages)
    return {"answer": response.content}


graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()


<p>Chat with PapeRAG using Gradio</p>

In [34]:
def chat_fn(message: str, history: List[dict]):
    chat_state = {
        "question": message,
        "chat_history": history.copy()
    }

    result = graph.invoke(chat_state)
    answer = result["answer"]

    return {"role": "assistant", "content": answer}


gr.ChatInterface(
    fn=chat_fn,
    title="PapeRAG",
    description="Chat with a Retrieval-Augmented model for academic paper analysis.",
    type="messages",
).launch()

* Running on local URL:  http://127.0.0.1:7877
* To create a public link, set `share=True` in `launch()`.


