In [1]:
# Let's expand on the basic RAG system we developed from scratch

In [2]:
from dotenv import load_dotenv
load_dotenv() # load environment variables

True

In [3]:
# Do all the setup here

import bs4
from langchain_core.prompts import PromptTemplate
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_ollama import OllamaLLM

# Fetch blog content
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

# Split content into chunks
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=300, chunk_overlap=50) # uses tiktoken before splitting
splits = text_splitter.split_documents(blog_docs)

# Index (embed and store) the chunks into a vector db
vectorstore = Chroma.from_documents(documents=splits, embedding=OllamaEmbeddings(model="nomic-embed-text"))
retriever = vectorstore.as_retriever()

# Start llama3.2 model
llm = OllamaLLM(model="llama3.2:1b")

In [12]:
# Define some reusable prompts & queries

# User query
query = "What is task decomposition for LLM agents?"
query_cot = "What are the main components of an LLM-powered autonomous agent system?"

# RAG template
rag_template = PromptTemplate.from_template('''
    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
    Context: {context}
    Question: {question}
    Answer:
''')

### Problem Statement

User queries are a challenge: _If a user provides an ambiguous query, they'll get ambiguous matches._

LLMs just follow what was in the context and hallucinate answers as a result.

#### Example

User query:
- "Jaguar speed"

Problem:
- "Jaguar" could refer to the animal or the car.
- "Speed" could mean top speed, acceleration, or agility.

Potential bad outcome:
- Retrieval returns mixed results (e.g., car specs and animal facts).
- LLM hallucinates by combining facts:
    - “The Jaguar can reach 150 mph in the wild.”

### Strategy #1: Multi-Query

![Image](rsc/jupyter/multi-query.png)

**Idea**: Instead of relying on just one query, let's generate _multiple semantically different queries_ that represent various plausible interpretations or reformulations of the original user question.

In [4]:
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import PromptTemplate

# Multi Query: Different Perspectives
template_multi = """You are an AI language model assistant. Your task is to generate five
different versions of the given user question to retrieve relevant documents from a vector
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search.
Provide these alternative questions as a numbered list. Don't include text before or after
this list. Original question: {question}"""
prompt_multi_query = PromptTemplate.from_template(template_multi)

generate_queries_multi = (
    prompt_multi_query
    | llm
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

multi_queries = generate_queries_multi.invoke(query)
print(len(multi_queries))
print(multi_queries)

5
['1. How do I decompose tasks into smaller sub-tasks using large language models?', '2. What are the steps involved in breaking down complex tasks using transformer-based language models like LLMs?', '3. In natural language processing, how does task decomposition help in modeling and understanding multi-step processes?', '4. Can you explain task decomposition for LLM agents in terms of their information retrieval capabilities?', '5. What is the theoretical framework behind task decomposition and its relevance to information retrieval tasks?']


In [5]:
retrieval_chain_multi = generate_queries_multi | retriever.map()

multi_retrievals = retrieval_chain_multi.invoke(query)
print(len(multi_retrievals))  # 5 questions
print(len(multi_retrievals[0]))  # 4 documents per question

5
4


In [6]:
from operator import itemgetter

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [doc.page_content for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return unique_docs

rag_chain_multi = (
    {
        "context": retrieval_chain_multi | get_unique_union,
        "question": itemgetter("question")
    }
    | rag_template
    | llm
    | StrOutputParser()
)

rag_chain_multi.invoke({"question": query})

'Task decomposition for Large Language Model (LLM) agents involves breaking down complex tasks into smaller, manageable parts or "thought steps" to enable effective execution and planning. This process transforms big tasks into multiple manageable tasks that can be explored by the model using techniques such as chain of thought (CoT) and tree of thoughts (Yao et al., 2022). The goal is to identify the necessary subgoals for achieving a specific task, allowing LLM agents to break down complex problems into more manageable parts.'

### Strategy #2: RAG-Fusion

![Image](rsc/jupyter/rag-fusion.png)

**Idea**: Extension of Multi-Query - we not only generate multiple semantically different queries, but also combine (_fuse_) the results by _ranking and removing duplicates_ before passing them to the language model.

In [7]:
# RAG-Fusion: Related
template_fusion = """You are a helpful assistant that generates multiple search queries based on a single input query. Provide these queries as a numbered list. Don't include text before or after this list. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_fusion = PromptTemplate.from_template(template_fusion)

generate_queries_fusion = (
    prompt_fusion
    | llm
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

fusion_queries = generate_queries_fusion.invoke(query)
print(len(fusion_queries))
print(fusion_queries)

4
['1"What are the steps involved in task decomposition for large language models?"', '2"Can you explain task decomposition algorithms used by LLMs?"', '3"What\'s the purpose of task decomposition in natural language processing?"', '4"How do large language models achieve task decomposition?"']


In [8]:
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents
        and an optional parameter k used in the RRF formula """

    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results

retrieval_chain_fusion = generate_queries_fusion | retriever.map() | reciprocal_rank_fusion

docs = retrieval_chain_fusion.invoke({"question": query})
len(docs)

  (loads(doc), score)


9

In [9]:
rag_chain_fusion = (
    {
        "context": retrieval_chain_fusion,
        "question": itemgetter("question")
    }
    | rag_template
    | llm
    | StrOutputParser()
)

rag_chain_fusion.invoke({"question": query})

"Task decomposition for LLM (Large Language Models) agents involves breaking down complex tasks into smaller, manageable steps to improve their performance and reliability. This process typically involves the following stages:\n\n1. **Task planning**: The agent identifies the task and its dependencies.\n2. **Instruction selection**: The agent selects an appropriate instruction or model based on the user's request and the call command.\n3. **Model execution**: The selected model executes on the specific task, generating results and logs.\n\nThis process is crucial for LLM agents to achieve optimal planning proficiency and reliability."

### Strategy #3: Decomposition

![Image](rsc/jupyter/decomposition.png)

**Idea**: Break down a complex query into simpler, more specific sub-queries that each target a different facet of the information need.

_Note: This is especially helpful in multi-hop questions or those requiring reasoning over multiple pieces of information._

In [15]:
template_decomposition_generate = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Provide these sub-questions as a numbered list. Don't include text before or after this list. \n

Generate multiple search queries related to: {question} \n
Output (3 queries):"""
prompt_decomposition_generate = PromptTemplate.from_template(template_decomposition_generate)

generate_queries_decomposition = (
    prompt_decomposition_generate
    | llm
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

decomposition_queries = generate_queries_decomposition.invoke(query_cot)
print(len(decomposition_queries))
print(decomposition_queries)

3
['1. What are the primary architecture components of a Large Language Model (LLM) powered autonomous agent?', '2. How do different component types contribute to the overall performance and functionality of an LLM-based autonomous agent?', '3. What are some key technologies that enable the integration of AI models like LLMs into autonomous agent systems?']


In [21]:
template_decomposition_cot = """Here is the question you need to answer:

\n --- \n {question} \n --- \n

Here is any available background question + answer pairs:

\n --- \n {qa_pairs} \n --- \n

Here is additional context relevant to the question:

\n --- \n {context} \n --- \n

Use the above context and any background question + answer pairs to answer the question: \n {question}
"""

prompt_decomposition_cot = PromptTemplate.from_template(template_decomposition_cot)

rag_chain_decomposition = (
    {
        "context": itemgetter("question") | retriever.map(),
        "question": itemgetter("question"),
        "qa_pairs": itemgetter("qa_pairs")
    }
    | prompt_decomposition_cot
    | llm
    | StrOutputParser()
)

def format_qa_pair(question, answer):
    """Format Q and A pair"""

    formatted_string = ""
    formatted_string += f"Question: {question}\nAnswer: {answer}\n\n"
    return formatted_string.strip()

qa_pairs = ""
for question in decomposition_queries:
    answer = rag_chain_decomposition.invoke({"question": question, "qa_pairs": qa_pairs})  # sub-problem answers
    qa_pair = format_qa_pair(question, answer)
    qa_pairs = qa_pairs + "\n---\n" + qa_pair

answer = rag_chain_decomposition.invoke({"question": query_cot, "qa_pairs": qa_pairs})  # final answer to original question

print(answer)

Based on the provided context, here are the main components of an LLM (Large Language Model) powered autonomous agent system:

1. **External Memory**: The ability of an LLM to store information in external memory, such as vector stores or databases, which can be used for fast and efficient retrieval.
2. **Long-Term Memory (LTM)**: Long-term storage capacity that allows the agent to retain information for a remarkable long time, potentially ranging from a few days to decades.
3. **External APIs**: Access to external tools and platforms that enable the LLM-powered autonomous agent system to perform tasks autonomously, such as text processing, data analysis, or machine learning.
4. **Adaptability**: The ability of the LLM-powered autonomous agent system to adapt and learn from new experiences, environments, and tasks.
5. **Autonomy**: The capacity of the agent to operate independently and make decisions without human intervention.

These components work together to enable an LLM-powered a