In [None]:
from pprint import pprint

# from dotenv import load_dotenv
from IPython.display import Image, display
from langchain.document_loaders import WebBaseLoader
from langchain.schema import SystemMessage
from langchain.tools.retriever import create_retriever_tool
from langchain_core.vectorstores import InMemoryVectorStore
from fastembed import TextEmbedding
from langchain_deepseek import ChatDeepSeek
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import END, MessagesState, StateGraph
from langgraph.prebuilt import ToolNode, tools_condition
from dataclasses import dataclass, field
from langchain_core.embeddings import Embeddings
import os

from utils import format_messages

# _ = load_dotenv()

In [None]:
urls = [
    "https://lilianweng.github.io/posts/2025-05-01-thinking/",
    "https://lilianweng.github.io/posts/2024-11-28-reward-hacking/",
    "https://lilianweng.github.io/posts/2024-07-07-hallucination/",
    "https://lilianweng.github.io/posts/2024-04-12-diffusion-video/",
]

docs = [WebBaseLoader(url).load() for url in urls]

In [None]:
doc_list = [item for sublist in docs for item in sublist]
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=1000, chunk_overlap=50
)

# Splits documents into chunks
doc_splits = text_splitter.split_documents(doc_list)

In [None]:
@dataclass
class FastEmbed(Embeddings):
    fe: TextEmbedding = field(default_factory=TextEmbedding)

    def embed_documents(self, documents: list[str]) -> list[list[float]]:
        return [emb.tolist() for emb in self.fe.embed(documents)]

    def embed_query(self, query: str) -> list[float]:
        return list(self.fe.embed([query]))[0].tolist()

In [None]:
embeddings = FastEmbed(
    TextEmbedding(
        model_name="jinaai/jina-embeddings-v2-base-de",
        cache_dir=os.path.expanduser("~/.cache/fastembed"),
    )
)
vector_store = InMemoryVectorStore.from_documents(
    documents=doc_splits, embedding=embeddings
)

# Create a retriever
retriever = vector_store.as_retriever()

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

model_fp16.onnx:   0%|          | 0.00/321M [00:00<?, ?B/s]

In [None]:
retriever.invoke("What is Chain of thought?")

[Document(id='3562d8cb-5a4e-43de-a8a1-935d315fdb96', metadata={'source': 'https://lilianweng.github.io/posts/2025-05-01-thinking/', 'title': "Why We Think | Lil'Log", 'description': 'Special thanks to John Schulman for a lot of super valuable feedback and direct edits on this post.\nTest time compute (Graves et al. 2016, Ling, et al. 2017, Cobbe et al. 2021) and Chain-of-thought (CoT) (Wei et al. 2022, Nye et al. 2021), have led to significant improvements in model performance, while raising many research questions. This post aims to review recent developments in how to effectively use test-time compute (i.e. “thinking time”) and why it helps.', 'language': 'en'}, page_content='[20] Yuxi Xie, et al. “Self-Evaluation Guided Beam Search for Reasoning.”. NeurIPS 2023.\n[21] Yangzhen Wu, et al. “Inference Scaling Laws: An Empirical Analysis of Compute-Optimal Inference for Problem-Solving with Language Models”. ICLR 2025.\n[22] Dongwei Jiang, et al. “RATIONALYST: Pre-training Process-Super

In [None]:
retriever_tool = create_retriever_tool(
    retriever,
    "retrieve_blog_posts",
    "Search and return information about Lilian Weng blog posts",
)

In [None]:
# Test the query tool
result = retriever_tool.invoke({"query": "types of reward hacking"})
pprint(result[10:1000])

('king examples in real life#\n'
 '\n'
 'The recommendation algorithm for social media is intended to provide useful '
 'information. However, usefulness is often measured by proxy metrics, such as '
 'the number of likes or comments, or the time or frequency of engagement on '
 'the platform. The algorithm ends up recommending content that can affect '
 'users’ emotion states such as outrageous and extreme content in order to '
 'trigger more engagement. (Harari, 2024)\n'
 'Optimizing for misspecified proxy metrics for a video sharing site may '
 'aggressively increase the watch time of users while the true goal is to '
 'optimize users’ subjective well-being. (Link)\n'
 '“The Big Short” - 2008 financial crisis caused by the housing bubble. Reward '
 'hacking of our society happened as people tried to game the financial '
 'system.\n'
 '\n'
 'Why does Reward Hacking Exist?#\n'
 'Goodhart’s Law states that “When a measure becomes a target, it ceases to be '
 'a good measure”. The intui

In [None]:
llm = ChatDeepSeek(model="deepseek-chat", temperature=1.3)
tools = [retriever_tool]
# Bind tools to LLM for agent functionality
llm_with_tools = llm.bind_tools(tools)

In [None]:
# Define the RAG agent system prompt
rag_prompt = """You are a helpful assistant tasked with retrieving information from a series of technical blog posts by Lilian Weng.
Clarify the scope of research with the user before using your retrieval tool to gather context. Reflect on any context you fetch, and
proceed until you have sufficient context to answer the user's research request."""


def llm_call(state: MessagesState):
    return {
        "messages": [
            llm_with_tools.invoke(
                [SystemMessage(content=rag_prompt)] + state["messages"]
            )
        ]
    }


agent_builder = StateGraph(MessagesState)

agent_builder.add_node("llm_call", llm_call)
agent_builder.add_node("tool_call", ToolNode(tools))
agent_builder.add_conditional_edges(
    "llm_call",
    tools_condition,
    {
        "tools": "tool_call",
        "__end__": END,
    },
)


agent_builder.add_edge("tool_call", "llm_call")
agent_builder.set_entry_point("llm_call")
agent = agent_builder.compile()

In [None]:
display(Image(agent.get_graph(xray=True).draw_mermaid_png()))

In [None]:
query = "What are the types of reward hacking discussed in the blogs?"
result = agent.invoke({"messages": [{"role": "user", "content": query}]})

# Format and display results
format_messages(result["messages"])