In [1]:
%%capture --no-stderr
#%pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-chroma bs4
#!pip install langchain-cohere
#!pip install -U langgraph

# Setup & Keys

In [2]:
import json

# Load the secret keys from the JSON file
def load_secret_keys(file_path):
    with open(file_path, 'r') as file:
        secret_keys = json.load(file)
    return secret_keys
keys =load_secret_keys('secrets.json')

In [3]:
import os

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = keys['Groq']
os.environ["COHERE_API_KEY"] = keys['cohere']
os.environ["GROQ_API_KEY"] = keys['Groq']

# Language Model

In [4]:
from langchain_groq import ChatGroq

llm = ChatGroq(model="llama3-8b-8192")

# Chains

In [5]:
import bs4
from langchain import hub
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import ChatPromptTemplate
#from langchain_openai import OpenAIEmbeddings
from langchain_cohere import CohereEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [8]:
# 1. Load, chunk and index the contents of the blog to create a retriever.
url = "https://lilianweng.github.io/posts/2023-06-23-agent/"
#url = 'https://jalammar.github.io/illustrated-transformer/'
loader = WebBaseLoader(
    web_paths=(url,),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=CohereEmbeddings())
retriever = vectorstore.as_retriever()

In [15]:
# 2. Incorporate the retriever into a question-answering chain.
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use ten sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [10]:
response = rag_chain.invoke({"input": "What is Task Decomposition?"})
response["answer"]

Failed to batch ingest runs: LangSmithError('Failed to POST https://api.smith.langchain.com/runs/batch in LangSmith API. HTTPError(\'403 Client Error: Forbidden for url: https://api.smith.langchain.com/runs/batch\', \'{"detail":"Forbidden"}\')')


'Task decomposition is the process of breaking down a complex task into smaller, simpler, and more manageable steps. This technique is used to enhance model performance on complex tasks, allowing the model to "think step by step" and decompose hard tasks into smaller and simpler steps.'

Failed to batch ingest runs: LangSmithError('Failed to POST https://api.smith.langchain.com/runs/batch in LangSmith API. HTTPError(\'403 Client Error: Forbidden for url: https://api.smith.langchain.com/runs/batch\', \'{"detail":"Forbidden"}\')')
Failed to batch ingest runs: LangSmithError('Failed to POST https://api.smith.langchain.com/runs/batch in LangSmith API. HTTPError(\'403 Client Error: Forbidden for url: https://api.smith.langchain.com/runs/batch\', \'{"detail":"Forbidden"}\')')
Failed to batch ingest runs: LangSmithError('Failed to POST https://api.smith.langchain.com/runs/batch in LangSmith API. HTTPError(\'403 Client Error: Forbidden for url: https://api.smith.langchain.com/runs/batch\', \'{"detail":"Forbidden"}\')')
Failed to batch ingest runs: LangSmithError('Failed to POST https://api.smith.langchain.com/runs/batch in LangSmith API. HTTPError(\'403 Client Error: Forbidden for url: https://api.smith.langchain.com/runs/batch\', \'{"detail":"Forbidden"}\')')
Failed to batch 

In [16]:
response = rag_chain.invoke({"input": "What is Reflexion?"})
response["answer"]

'Reflexion is a framework that equips agents with dynamic memory and self-reflection capabilities to improve reasoning skills. It is designed to help agents learn from their mistakes and adapt to new situations by allowing them to reflect on their past actions and decisions.'

## Adding chat history

In [17]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [20]:
from langchain_core.messages import AIMessage, HumanMessage

chat_history = []

question = "What is Task Decomposition?"
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})

print(ai_msg_1["answer"])

chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_1["answer"]),
    ]
)
second_question = "What are common ways of doing it?"
ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})

print(ai_msg_2["answer"])

Task decomposition is the process of breaking down a complex task into smaller and simpler steps. This is done to make the task more manageable and to enable an agent to plan and execute the task more effectively.
I don't have enough context to provide a specific answer. Could you please provide more information or clarify what "it" refers to? I'll do my best to assist you with common ways of doing whatever "it" is.


In [21]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [22]:
conversational_rag_chain.invoke(
    {"input": "What is Task Decomposition?"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

'Task decomposition is the process of breaking down a complex task into smaller and simpler steps. This technique is used to decompose hard tasks into manageable tasks, which can be solved more easily.'

In [23]:
conversational_rag_chain.invoke(
    {"input": "What are common ways of doing it?"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

'I\'m not sure what "it" refers to. Could you please clarify what you\'re asking about? If you provide more context or information, I\'ll do my best to help you.'

In [24]:
for message in store["abc123"].messages:
    if isinstance(message, AIMessage):
        prefix = "AI"
    else:
        prefix = "User"

    print(f"{prefix}: {message.content}\n")

User: What is Task Decomposition?

AI: Task decomposition is the process of breaking down a complex task into smaller and simpler steps. This technique is used to decompose hard tasks into manageable tasks, which can be solved more easily.

User: What are common ways of doing it?

AI: I'm not sure what "it" refers to. Could you please clarify what you're asking about? If you provide more context or information, I'll do my best to help you.



## Tying it together

![Conversational Retrieval Chain](https://python.langchain.com/v0.2/assets/images/conversational_retrieval_chain-5c7a96abe29e582bc575a0a0d63f86b0.png)

In [25]:
import bs4
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_chroma import Chroma
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
#from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_cohere import CohereEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

#llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
llm = ChatGroq(model="llama3-8b-8192",temperature=0)


### Construct retriever ###
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=CohereEmbeddings())
retriever = vectorstore.as_retriever()


### Contextualize question ###
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)


### Answer question ###
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)


### Statefully manage chat history ###
store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [26]:
conversational_rag_chain.invoke(
    {"input": "What is Task Decomposition?"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

'Task Decomposition is a technique used to break down complex tasks into smaller, simpler steps. This is done by instructing a model to "think step by step" and utilize more test-time computation to decompose hard tasks into manageable tasks.'

In [27]:
conversational_rag_chain.invoke(
    {"input": "What are common ways of doing it?"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

'Common ways of doing Task Decomposition include using simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", or task-specific instructions, such as "Write a story outline." for writing a novel, or even human inputs.'

# Agents

Agents leverage the reasoning capabilities of LLMs to make decisions during execution. Using agents allow you to offload some discretion over the retrieval process. Although their behavior is less predictable than chains, they offer some advantages in this context:

- Agents generate the input to the retriever directly, without necessarily needing us to explicitly build in contextualization, as we did above;
- Agents can execute multiple retrieval steps in service of a query, or refrain from executing a retrieval step altogether (e.g., in response to a generic greeting from a user).

## Retrieval tool

In [28]:
from langchain.tools.retriever import create_retriever_tool

tool = create_retriever_tool(
    retriever,
    "blog_post_retriever",
    "Searches and returns excerpts from the Autonomous Agents blog post.",
)
tools = [tool]

In [29]:
tool.invoke("task decomposition")

'Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\n\nFig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by st

## Agent constructor

In [30]:
from langgraph.prebuilt import create_react_agent

agent_executor = create_react_agent(llm, tools)

In [31]:
query = "What is Task Decomposition?"
try:
    # Attempt to stream the messages from the agent_executor
    for s in agent_executor.stream({"messages": [HumanMessage(content=query)]}):
        print(s)    # Print each message from the stream
        print("----")  # Print separator for clarity
except Exception as e:  # Handle any exception that occurs
    print("*"*25)
    print(e)  # Print the exception message

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_n18w', 'function': {'arguments': '{"query":"What is task decomposition?"}', 'name': 'blog_post_retriever'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 72, 'prompt_tokens': 951, 'total_tokens': 1023, 'completion_time': 0.057928367, 'prompt_time': 0.154055392, 'queue_time': None, 'total_time': 0.21198375900000002}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_6a6771ae9c', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-29c98e64-c9e5-4d21-84d2-6714c4eab611-0', tool_calls=[{'name': 'blog_post_retriever', 'args': {'query': 'What is task decomposition?'}, 'id': 'call_n18w'}], usage_metadata={'input_tokens': 951, 'output_tokens': 72, 'total_tokens': 1023})]}}
----
{'tools': {'messages': [ToolMessage(content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An 

In [32]:
from langgraph.checkpoint.sqlite import SqliteSaver

memory = SqliteSaver.from_conn_string(":memory:")

agent_executor = create_react_agent(llm, tools, checkpointer=memory)

In [33]:
config = {"configurable": {"thread_id": "abc123"}}

for s in agent_executor.stream({"messages": [HumanMessage(content="Hi! I'm bob")]}, config=config):
    print(s)
    print("----")

{'agent': {'messages': [AIMessage(content='Hi Bob!', response_metadata={'token_usage': {'completion_tokens': 4, 'prompt_tokens': 946, 'total_tokens': 950, 'completion_time': 0.002406277, 'prompt_time': 0.141447366, 'queue_time': None, 'total_time': 0.143853643}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_af05557ca2', 'finish_reason': 'stop', 'logprobs': None}, id='run-0754fade-d0a0-4548-acd3-9112c90dd8a4-0', usage_metadata={'input_tokens': 946, 'output_tokens': 4, 'total_tokens': 950})]}}
----


In [34]:
query = "What according to the blog post are common ways of doing it? redo the search"

for s in agent_executor.stream(
    {"messages": [HumanMessage(content=query)]}, config=config
):
    print(s)
    print("----")

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_0cac', 'function': {'arguments': '{"query":"common ways of doing it"}', 'name': 'blog_post_retriever'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 80, 'prompt_tokens': 954, 'total_tokens': 1034, 'completion_time': 0.062800286, 'prompt_time': 0.217456711, 'queue_time': None, 'total_time': 0.280256997}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_179b0f92c9', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-3744f83c-055f-423e-b123-403523e2c4e6-0', tool_calls=[{'name': 'blog_post_retriever', 'args': {'query': 'common ways of doing it'}, 'id': 'call_0cac'}], usage_metadata={'input_tokens': 954, 'output_tokens': 80, 'total_tokens': 1034})]}}
----
{'tools': {'messages': [ToolMessage(content='Fig. 9. Comparison of MIPS algorithms, measured in recall@10. (Image source: Google Blog, 2020)\nCheck more MIPS algorithms and performance comparison in

In [23]:
query = "What is Task Decomposition?"
try:
    for s in agent_executor.stream({"messages": [HumanMessage(content=query)]}, config=config):
        print(s)
        print("----")
except Exception as e:  # Handle any exception that occurs
    print("*"*25)
    print(e)  # Print the exception message

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_adgw', 'function': {'arguments': '{"query":"task decomposition"}', 'name': 'blog_post_retriever'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 37, 'prompt_tokens': 2049, 'total_tokens': 2086, 'completion_time': 0.028903229, 'prompt_time': 0.352043083, 'queue_time': None, 'total_time': 0.38094631199999995}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_873a560973', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-87133322-51f8-4c52-9d7a-2cb50ae6a976-0', tool_calls=[{'name': 'blog_post_retriever', 'args': {'query': 'task decomposition'}, 'id': 'call_adgw'}], usage_metadata={'input_tokens': 2049, 'output_tokens': 37, 'total_tokens': 2086})]}}
----
{'tools': {'messages': [ToolMessage(content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to k

# Complete code

In [35]:
import bs4
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain.tools.retriever import create_retriever_tool
from langchain_chroma import Chroma
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
#from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_cohere import CohereEmbeddings
from langchain_groq import ChatGroq
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.prebuilt import create_react_agent
import os
import json

memory = SqliteSaver.from_conn_string(":memory:")
#llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
llm = ChatGroq(model="llama3-8b-8192",temperature=0)


### Construct retriever ###
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=CohereEmbeddings())
retriever = vectorstore.as_retriever()


### Build retriever tool ###
tool = create_retriever_tool(
    retriever,
    "blog_post_retriever",
    "Searches and returns excerpts from the Autonomous Agents blog post.",
)
tools = [tool]


agent_executor = create_react_agent(llm, tools, checkpointer=memory)

In [36]:
query = "What according to the blog post are common ways of doing it? redo the search"

for s in agent_executor.stream(
    {"messages": [HumanMessage(content=query)]}, config=config
):
    print(s)
    print("----")

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_2rek', 'function': {'arguments': '{"query":"common ways of doing it"}', 'name': 'blog_post_retriever'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 73, 'prompt_tokens': 936, 'total_tokens': 1009, 'completion_time': 0.057310101, 'prompt_time': 0.140738811, 'queue_time': None, 'total_time': 0.198048912}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_873a560973', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-3b6d27e6-7ebc-4b16-a171-c47d707a61f3-0', tool_calls=[{'name': 'blog_post_retriever', 'args': {'query': 'common ways of doing it'}, 'id': 'call_2rek'}], usage_metadata={'input_tokens': 936, 'output_tokens': 73, 'total_tokens': 1009})]}}
----
{'tools': {'messages': [ToolMessage(content='Fig. 9. Comparison of MIPS algorithms, measured in recall@10. (Image source: Google Blog, 2020)\nCheck more MIPS algorithms and performance comparison in

In [37]:
agent_executor.dict()

{'name': 'LangGraph',
 'nodes': {'__start__': {'name': None,
   'bound': {'name': None, 'input_type': None, 'func': None, 'afunc': None},
   'kwargs': {},
   'config': {'tags': ['langsmith:hidden']},
   'config_factories': [],
   'custom_input_type': None,
   'custom_output_type': None,
   'channels': ['__start__'],
   'triggers': ['__start__'],
   'mapper': None,
   'writers': [ChannelWrite<messages>(recurse=True, writes=[ChannelWriteEntry(channel='messages', value=<object object at 0x75be5c9dbc60>, skip_none=False, mapper=_get_state_key(recurse=False))], require_at_least_one_of=['messages']),
    ChannelWrite<start:agent>(recurse=True, writes=[ChannelWriteEntry(channel='start:agent', value='__start__', skip_none=False, mapper=None)], require_at_least_one_of=None)]},
  'agent': {'name': None,
   'bound': RunnableLambda(call_model),
   'kwargs': {},
   'config': {'tags': []},
   'config_factories': [],
   'custom_input_type': None,
   'custom_output_type': None,
   'channels': {'messag