In [11]:
from operator import itemgetter

import bs4
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.chat_history import InMemoryChatMessageHistory
from langchain_core.documents import Document
from langchain_core.messages import (
    AIMessage,
    HumanMessage,
    SystemMessage,
    trim_messages,
)
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama.chat_models import ChatOllama
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [12]:
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
model = ChatOllama(model="gemma3")

In [13]:
documents = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
).split_documents(
    WebBaseLoader(
        web_paths=[
            "https://lilianweng.github.io/posts/2023-06-23-agent/",
        ],
        bs_kwargs=dict(
            parse_only=bs4.SoupStrainer(
                class_=(
                    "post-title",
                    "post-content",
                    "post-header",
                )
            )
        ),
    ).load()
)
vector_store = Chroma.from_documents(documents, embedding)

In [14]:
vector_store.similarity_search(query="What is an agent system?")

[Document(id='57a02020-af67-448b-9790-41ec50ee521e', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content="Planning is essentially in order to optimize believability at the moment vs in time.\nPrompt template: {Intro of an agent X}. Here is X's plan today in broad strokes: 1)\nRelationships between agents and observations of one agent by another are all taken into consideration for planning and reacting.\nEnvironment information is present in a tree structure.\n\n\n\n\n\nThe generative agent architecture. (Image source: Park et al. 2023)"),
 Document(id='b743a65d-2a4e-4756-86a3-9812e01353d6', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content="Planning is essentially in order to optimize believability at the moment vs in time.\nPrompt template: {Intro of an agent X}. Here is X's plan today in broad strokes: 1)\nRelationships between agents and observations of one agent by another are all taken into consideratio

In [15]:
retriever = RunnableLambda(vector_store.similarity_search).bind(k=1)
retriever.batch(["agent system", "llm"])

[[Document(id='1342e8f6-72a8-4b9d-bcf7-ed92fe56d0b7', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:')],
 [Document(id='f3cfabce-40e9-40ba-ae2a-43f8319dd7d1', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='are a large number of APIs, LLM first has access to API search engine to find the right API to call and then uses the corresponding documentation to make a call.')]]

In [16]:
retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs=dict(k=1),
)
retriever.batch(["agent", "llm"])

[[Document(id='1342e8f6-72a8-4b9d-bcf7-ed92fe56d0b7', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:')],
 [Document(id='f3cfabce-40e9-40ba-ae2a-43f8319dd7d1', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='are a large number of APIs, LLM first has access to API search engine to find the right API to call and then uses the corresponding documentation to make a call.')]]

In [17]:
message = """"
Answer this question using the provided context only.

{question}

Context:

{context}
"""

prompt = ChatPromptTemplate.from_messages(["human", message])
rag_chain = dict(context=retriever, question=RunnablePassthrough()) | prompt | model

In [18]:
rag_chain.invoke("What is an agent system?")

AIMessage(content='According to the text, an agent system takes into consideration relationships between agents and observations of one agent by another, along with environment information presented in a tree structure, for planning and reacting.', additional_kwargs={}, response_metadata={'model': 'gemma3', 'created_at': '2025-07-10T21:04:01.483116Z', 'done': True, 'done_reason': 'stop', 'total_duration': 4392271125, 'load_duration': 2200680500, 'prompt_eval_count': 212, 'prompt_eval_duration': 1257978625, 'eval_count': 38, 'eval_duration': 892116833, 'model_name': 'gemma3'}, id='run--2ef28540-c116-4458-93b4-ed01335ec1d6-0', usage_metadata={'input_tokens': 212, 'output_tokens': 38, 'total_tokens': 250})

In [20]:
result = rag_chain.invoke("What is the difference between ai agents and agentic ai?")
print(result.content)

The context doesn't explicitly define the difference between “AI agents” and “agentic AI.” It describes a planning process within an “agent” which involves optimizing believability, considering relationships between agents and observations, and using a tree structure for environment information.
