In [13]:
# Let's expand on the basic RAG system we developed from scratch

In [14]:
from dotenv import load_dotenv
load_dotenv() # load environment variables

True

In [15]:
# Do all the setup here

import bs4
from langchain_core.prompts import PromptTemplate
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_ollama import OllamaLLM

# Fetch blog content
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

# Split content into chunks
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=300, chunk_overlap=50) # uses tiktoken before splitting
splits = text_splitter.split_documents(blog_docs)

# Index (embed and store) the chunks into a vector db
vectorstore = Chroma.from_documents(documents=splits, embedding=OllamaEmbeddings(model="nomic-embed-text"))
retriever = vectorstore.as_retriever()

# Start llama3.2 model
llm = OllamaLLM(model="llama3.2:1b")

# User query
query = "What is task decomposition for LLM agents?"

# RAG template
rag_template = PromptTemplate.from_template('''
    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
    Context: {context}
    Question: {question}
    Answer:
''')

### Problem Statement

User queries are a challenge: _If a user provides an ambiguous query, they'll get ambiguous matches._

LLMs just follow what was in the context and hallucinate answers as a result.

### Strategy #1: Multi-Query

![Image](rsc/jupyter/multi-query.png)

**Motivation**: A single query may not capture all the nuances of an information need. It might:
- Use suboptimal phrasing.
- Miss alternative wordings or interpretations.
- Retrieve documents from only one angle or perspective.

**Idea**: Instead of relying on just one query, let's generate _multiple semantically different queries_ that represent various plausible interpretations or reformulations of the original user question.

In [16]:
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import PromptTemplate

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five
different versions of the given user question to retrieve relevant documents from a vector
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search.
Provide these alternative questions as a numbered list. Don't include text before or after
this list. Original question: {question}"""
prompt_perspectives = PromptTemplate.from_template(template)

generate_queries = (
    prompt_perspectives
    | llm
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

multi_queries = generate_queries.invoke(query)
print(len(multi_queries))
print(multi_queries)

5
['1. Task decomposition in language models refers to the process of breaking down complex tasks into smaller, more manageable sub-tasks, allowing for improved efficiency and effectiveness.', '2. Can you explain how task decomposition affects the performance of large language model (LLM) agents, particularly in terms of resource allocation and learning strategies?', "3. What are some common techniques used in task decomposition for LLMs, such as hierarchical or graph-based approaches, and how do they impact the model's ability to generalize?", '4. How does task decomposition compare to other methods like feature extraction or attribute selection in terms of achieving better performance on downstream tasks like language translation or question answering?', '5. Can you provide an example of a specific problem where task decomposition is applied for LLMs, such as sentiment analysis or text classification, and discuss the benefits of using this approach?']


In [17]:
retrieval_chain = generate_queries | retriever.map()

multi_retrievals = retrieval_chain.invoke(query)
print(len(multi_retrievals))  # 5 questions
print(len(multi_retrievals[0]))  # 4 documents per question

5
4


In [21]:
from operator import itemgetter

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [doc.page_content for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return unique_docs

final_rag_chain = (
    {
        "context": retrieval_chain | get_unique_union,
        "question": itemgetter("question")
    }
    | rag_template
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question": query})

'Task decomposition for LLM (Large Language Model) agents involves breaking down complex tasks into smaller and simpler steps to enable them to learn from past mistakes and improve their performance over time. This process can be done using various techniques, including chain of thought (CoT), tree of thoughts (Yao et al., 2022), or using human inputs (Task-specific instructions).'