In [1]:
from langchain_openai import ChatOpenAI
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings
from langchain_classic import hub
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage
from typing_extensions import List, TypedDict
from langgraph.graph import START, StateGraph
import gradio as gr
import re

  from .autonotebook import tqdm as notebook_tqdm


<p>Import llm tools</p>

In [2]:
llm = ChatOpenAI (
api_key = "ollama" ,
model = "qwen3:4b" ,
base_url = "http://localhost:11434/v1" ,
temperature = 0
)

embeddings = OllamaEmbeddings(model="nomic-embed-text")
vector_store = InMemoryVectorStore(embeddings)

<p>Import document(s) and embed them in a vector store</p>

In [4]:
file_path = "Bengio2021.pdf"
loader = PyPDFLoader(file_path)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,  # chunk size (characters)
    chunk_overlap=250,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)
all_splits = text_splitter.split_documents(docs)

print(f"Split blog post into {len(all_splits)} sub-documents.")

document_ids = vector_store.add_documents(documents=all_splits)
print(document_ids[:3])

prompt = hub.pull("rlm/rag-prompt")

Split blog post into 51 sub-documents.
['071a6535-872c-4cd1-8257-5879d3ef0c90', '04f2def7-a5d8-49b9-bdaf-979a08fe3b88', '250402d9-19f8-4f33-b103-e00190fda171']


<p>Create state graph</p>

In [8]:
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str
    chat_history: List[dict]
    conversation_summary: str  

summary_prompt = ChatPromptTemplate.from_messages([
    ("system", """You are an expert at summarizing conversations. 
Create a concise summary of the conversation that captures the key topics discussed, 
important questions asked, and main points from the answers.
Keep technical terms and specific references (like theorem names, definitions, etc.)."""),
    ("human", """Current Summary (if any):
{current_summary}

New Exchange:
User: {question}
Assistant: {answer}

Updated Summary:""")
])

def summarize_conversation(state: State):
    current_summary = state.get("conversation_summary", "")
    
    if not current_summary:
        current_summary = "No previous conversation."
    
    messages = summary_prompt.invoke({
        "current_summary": current_summary,
        "question": state["question"],
        "answer": state["answer"]
    })
    
    response = llm.invoke(messages)
    return {"conversation_summary": response.content}


def retrieve(state: State):
    question = state["question"]
    
    pattern = r'\b(definition|theorem|proposition|lemma|corollary)\s+(\d+)\b'
    match = re.search(pattern, question, re.IGNORECASE)
    
    if match:
        semantic_docs = vector_store.similarity_search(question, k=3)
        all_candidates = vector_store.similarity_search(question, k=50)
        
        keyword_docs = []
        search_term = match.group().lower()
        
        for doc in all_candidates:
            if search_term in doc.page_content.lower():
                keyword_docs.append(doc)
                if len(keyword_docs) >= 5:
                    break
        
        combined_docs = keyword_docs + [d for d in semantic_docs if d not in keyword_docs]
        retrieved_docs = combined_docs[:5]
        
    else:
        retrieved_docs = vector_store.similarity_search(question, k=5)
    
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    conversation_context = state.get("conversation_summary", "No previous conversation.")
    
    messages = prompt.invoke({
        "question": state["question"],
        "context": docs_content,
        "summary": conversation_context
    })
    response = llm.invoke(messages)
    return {"answer": response.content}

prompt = ChatPromptTemplate.from_messages([
    ("system", """You are an assistant for question-answering tasks.
Use the retrieved context and conversation summary to provide accurate answers."""),
    ("human", """Retrieved Context:
{context}

Conversation Summary:
{summary}

Current Question: {question}

Answer:""")
])

graph_builder = StateGraph(State).add_sequence([retrieve, generate, summarize_conversation])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

<p>Chat with PapeRAG using Gradio</p>

In [None]:
def chat_fn(message: str, history: List[dict]):
    if not hasattr(chat_fn, 'summary'):
        chat_fn.summary = ""
    
    chat_state = {
        "question": message,
        "conversation_summary": chat_fn.summary,
        "chat_history": []
    }

    result = graph.invoke(chat_state)
    answer = result["answer"]
    
    chat_fn.summary = result.get("conversation_summary", "")
    
    return {"role": "assistant", "content": answer}


gr.ChatInterface(
    fn=chat_fn,
    title="PapeRAG",
    description="Chat with a Retrieval-Augmented model for academic paper analysis.",
    type="messages",
).launch()

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.




Answer: Based on the retrieved context, Proposition 3 states:

When trajectories τ are sampled from an exploratory policy P with the same support as the optimal π (defined in Eq. 5) for a consistent flow F*∈F*, and assuming we have a sufficiently rich family of predictors (so that ∃θ: Fθ = F*), then:

1. The global optimum θ* of the expected training loss (EP(τ)[Lθ(τ)]) will produce the correct flow model: Fθ* = F*

2. The loss Lθ*(τ) will be 0 for all trajectories τ sampled from P(θ)

3. If the policy πθ* satisfies the condition πθ*(a|s) = Fθ*(s,a)/∑a'∈A(s) Fθ*(s,a'), then the resulting policy distribution will be πθ*(x) = R(x)/Z

This proposition demonstrates that a global optimum of the expected loss provides the correct flow model, and importantly, it shows that this method is an off-policy offline method - meaning we can use any broad-support policy to sample training trajectories and still obtain the correct flows and generative model. The text notes this approach is analogous to