In [1]:
from dotenv import load_dotenv
import os

## [Overview](markdown/overview.md)

### [Simple RAG query](markdown/simple-rag-query.md)

In [2]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_ollama import OllamaEmbeddings, ChatOllama

#### INDEXING ####

# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

embedding_function = OllamaEmbeddings(model='mxbai-embed-large')

current_dir = os.getcwd() 
persistent_directory = os.path.join(current_dir, "db", "chroma_db")

# Embed
vectorstore = Chroma.from_documents(documents=splits, 
                                    embedding=embedding_function, persist_directory=persistent_directory)

vectorstore.persist()


retriever = vectorstore.as_retriever()

#### RETRIEVAL and GENERATION ####

# Prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM
#llm = ChatOllama(model="qwen2.5:32b-instruct-q4_K_M")
llm = ChatOllama(model="llama3.1")

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Question
rag_chain.invoke("What is Task Decomposition?")

USER_AGENT environment variable not set, consider setting it to identify your requests.
  vectorstore.persist()


'Task Decomposition refers to breaking down a complex task into smaller, manageable steps. This is achieved by instructing the model to "think step by step" as part of the Chain of Thought (CoT) technique. CoT transforms big tasks into multiple simpler tasks, allowing for better interpretation of the model\'s thinking process.'

## Second Example
### In this instance we are using an already indexed vector db
### The 'chroma_db' persisted db contains chunks from a text file of the odyssey 
### We ask Who is Odysseus' wife?

In [3]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_ollama import OllamaEmbeddings, ChatOllama

embedding_function = OllamaEmbeddings(model='mxbai-embed-large')

# Get the current directory
current_dir = os.getcwd()

# Get the directory above the current directory
parent_dir = os.path.dirname(current_dir)

persistent_directory = os.path.join(parent_dir, "db", "chroma_db")

db = Chroma(persist_directory=persistent_directory,embedding_function=embedding_function)

bookretriever = db.as_retriever()

#### RETRIEVAL and GENERATION ####

# Prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM
llm = ChatOllama(model="llama3.1")

# Chain
rag_chain = (
    # {"context": retriever | format_docs, "question": RunnablePassthrough()}
    {"context": bookretriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Question
rag_chain.invoke("Who is Odysseus' wife?")

'Odysseus\' wife is Penelope. She is described as a "very admirable woman" with an "excellent nature".'

## Part 3: Retrieval
In the following code sample we can restrict the number of documents returned by setting the {"k": 1} arguement - in this case just one

In [4]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 1})

docs = retriever.get_relevant_documents("What is Task Decomposition?")

len(docs)

  docs = retriever.get_relevant_documents("What is Task Decomposition?")


1

## Generation
### Simple example without a retriever 
A Chat prompt is given and piped into the llm with the previosly created docs

In [5]:
from langchain.prompts import ChatPromptTemplate
# Prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
print(prompt)

input_variables=['context', 'question'] input_types={} partial_variables={} messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based only on the following context:\n{context}\n\nQuestion: {question}\n'), additional_kwargs={})]


In [6]:
# LLM
llm = ChatOllama(model="llama3.1")

In [7]:
# Chain
chain = prompt | llm

In [8]:
# Run
chain.invoke({"context":docs,"question":"What is Task Decomposition?"})

AIMessage(content='Task Decomposition refers to breaking down a complicated task into many smaller, more manageable steps that an agent can plan ahead for. It involves transforming hard tasks into multiple simpler ones, allowing for better understanding and execution of complex processes. This technique was introduced in the Chain of Thought (CoT) method by Wei et al. in 2022, which enhances model performance on complex tasks by instructing models to "think step by step".', additional_kwargs={}, response_metadata={'model': 'llama3.1', 'created_at': '2025-01-26T18:54:58.277997498Z', 'done': True, 'done_reason': 'stop', 'total_duration': 1338909818, 'load_duration': 46819704, 'prompt_eval_count': 185, 'prompt_eval_duration': 24000000, 'eval_count': 88, 'eval_duration': 1266000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-5d580c5c-aaa7-427d-a003-ee2c3c7f9c78-0', usage_metadata={'input_tokens': 185, 'output_tokens': 88, 'total_tokens': 273})

### Using a prompt from langchain hub

In [9]:
from langchain import hub
prompt_hub_rag = hub.pull("rlm/rag-prompt")

In [10]:
print(prompt_hub_rag)

input_variables=['context', 'question'] input_types={} partial_variables={} metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'} messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]


In [11]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition?")

'Task Decomposition is a planning technique used in complex task execution, which involves breaking down a difficult task into several smaller and more manageable tasks. This allows the agent to "think step by step" and utilize more test-time computation to achieve its goal.'

## [Query Transformations](markdown/query-transformations.md)

## [Part 5: Multi Query](markdown/multi-query.md)

In [12]:
from langchain.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama

llm = ChatOllama(model="qwen2.5:32b-instruct-q4_K_M")

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_queries = (
    prompt_perspectives 
    | llm
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)


In [13]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

# Retrieve
question = "What is task decomposition for LLM agents?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question":question})
len(docs)

1

In [14]:
from operator import itemgetter
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = ChatOllama(model="qwen2.5:32b-instruct-q4_K_M")

final_rag_chain = (
    {"context": retrieval_chain, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

'Task decomposition in the context of Large Language Model (LLM) agents refers to breaking down a complicated task into smaller, more manageable sub-tasks. This approach leverages the "Chain of Thought" (CoT) prompting technique, where the model is guided to think step by step through a complex problem. By decomposing tasks, the LLM can better manage and execute each part sequentially, which enhances its performance on challenging tasks. This method not only makes it easier for the agent to handle complexity but also provides insights into how the model processes information and arrives at solutions.'

## Second multi query example

In [15]:
from langchain.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama


llm = ChatOllama(model="llama3.1")

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser

generate_queries = (
    prompt_perspectives 
    | llm
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

# Retrieve
question = "How did Odysseus get home?"
retrieval_chain = generate_queries | bookretriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question":question})
len(docs)

from operator import itemgetter

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = ChatOllama(model="llama3.1")

final_rag_chain = (
    {"context": retrieval_chain, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})


'According to one of the documents (id=\'144dea67-f04b-40af-bfb8-b81a9bae24bc\'), it seems that Odysseus was helped by the Phaeacians, a group of people who live on an island. They took pity on him and brought him home.\n\nHowever, another document (id=\'f3f67c22-c154-4fe0-80f8-1a4e984151ba\') suggests that Odysseus was aided by the goddess Athena, who intervened on his behalf to help him return home. Specifically, it mentions that "the goddess, grey-eyed Athene, answered him" and implies that she played a role in facilitating his journey back to Ithaca.\n\nIt\'s worth noting that there may be some inconsistencies between different accounts of Odysseus\' journey, as the documents appear to be excerpts from various translations or interpretations of Homer\'s Odyssey.'

## [Part 6: RAG-Fusion](markdown/rank-fusion.md)

In [16]:
# RAG-Fusion: Related
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)
question = "What is task decomposition for LLM agents?"

In [17]:
from langchain_core.output_parsers import StrOutputParser

generate_queries = (
    prompt_rag_fusion 
    | llm
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [18]:
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents 
        and an optional parameter k used in the RRF formula """
    
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results

retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion
docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)

6

In [19]:

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain_rag_fusion, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

'Task decomposition for LLM (Large Language Model) agents involves breaking down complex tasks into simpler sub-tasks that can be processed by the model. According to the provided text, there are three ways to achieve this:\n\n1. Using simple prompting, such as "Steps for XYZ.\\n1." or "What are the subgoals for achieving XYZ?"\n2. By using task-specific instructions, e.g., "Write a story outline" for writing a novel.\n3. With human inputs.\n\nThese methods enable LLM agents to decompose tasks into manageable parts and tackle them in a structured manner.'

### Second Rank Fusion Example

In [20]:
question = "Who was the wife of Odysseus?"
retrieval_chain_rag_fusion = generate_queries | bookretriever.map() | reciprocal_rank_fusion
docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain_rag_fusion, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

"According to the provided texts, the wife of Odysseus was Penelope, the daughter of Icarius. This is mentioned in several places, including:\n\n* In Document (id='7f8848b2-3aae-4a08-84f6-fe1804fb086c'), where Agamemnon's ghost praises Penelope for her virtue and constancy.\n* In Document (id='fd2d9e87-3d2b-43c3-8d18-2c109f9c0416'), where Alcinous apologizes to Odysseus for his daughter not bringing him on to the house, saying that Penelope did tell him to follow along with the maids."

## [Part 7: Decomposition](markdown/decomposition.md)

In [21]:
# Decomposition
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Generate multiple search queries related to: {question} \n
Output (3 queries):"""
prompt_decomposition = ChatPromptTemplate.from_template(template)

In [22]:
llm = ChatOllama(model="llama3.1")
# Chain
generate_queries_decomposition = ( prompt_decomposition | llm | StrOutputParser() | (lambda x: x.split("\n")))

# Run
question = "What are the main components of an LLM-powered autonomous agent system?"
questions = generate_queries_decomposition.invoke({"question":question})

In [23]:
print(questions)

['Here are three potential sub-queries related to the main question:', '', '1. **What are the key technologies and architectures that enable Large Language Model (LLM) integration in autonomous systems?**', '', 'This query could help break down the input into a specific area of focus, such as the technical aspects of LLM-powered autonomous agents.', '', '2. **How do LLMs interact with other components of an autonomous system, such as sensors, actuators, and decision-making modules?**', '', 'This sub-question explores the interfaces and interdependencies between LLMs and other parts of the system, which could be a crucial aspect of designing and implementing LLM-powered autonomous agents.', '', '3. **What are the specific skills and capabilities that can be achieved through LLM-based reasoning and problem-solving in an autonomous agent system?**', '', 'This query delves into the potential applications and benefits of using LLMs in autonomous systems, such as improved decision-making, na

Answer recursively

In [24]:
# Prompt
template = """Here is the question you need to answer:

\n --- \n {question} \n --- \n

Here is any available background question + answer pairs:

\n --- \n {q_a_pairs} \n --- \n

Here is additional context relevant to the question: 

\n --- \n {context} \n --- \n

Use the above context and any background question + answer pairs to answer the question: \n {question}
"""

decomposition_prompt = ChatPromptTemplate.from_template(template)

In [25]:
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser

def format_qa_pair(question, answer):
    """Format Q and A pair"""
    
    formatted_string = ""
    formatted_string += f"Question: {question}\nAnswer: {answer}\n\n"
    return formatted_string.strip()

llm = ChatOllama(model="llama3.1")
q_a_pairs = ""
for q in questions:
    
    rag_chain = (
    {"context": itemgetter("question") | retriever, 
     "question": itemgetter("question"),
     "q_a_pairs": itemgetter("q_a_pairs")} 
    | decomposition_prompt
    | llm
    | StrOutputParser())

    answer = rag_chain.invoke({"question":q,"q_a_pairs":q_a_pairs})
    q_a_pair = format_qa_pair(q,answer)
    q_a_pairs = q_a_pairs + "\n---\n"+  q_a_pair

In [26]:
print(answer)

To address this question, we'll focus on the capabilities of LLMs in autonomous systems.

The specific skills and capabilities that can be achieved through LLM-based reasoning and problem-solving in an autonomous agent system include:

1.  **Planning**: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.
2.  **Subgoal and Decomposition**: The agent identifies potential hazards (e.g., pedestrians, potholes) and predicts the vehicle's trajectory based on processed information from sensors.
3.  **Reflection and Refinement**: The agent can do self-criticism and self-reflection over past actions, learn from mistakes, and refine them for future steps, thereby improving the quality of final results.

These capabilities enable autonomous systems to perform complex tasks in real-world scenarios by integrating LLMs with other system components.

To answer the question directly:

This query delves into the potential applications and b

### second decomposition example

In [29]:
# Run
question = "What happens during Odysseus's encounter with Polyphemus?"
questions = generate_queries_decomposition.invoke({"question":question})

q_a_pairs = ""
for q in questions:
    
    rag_chain = (
    {"context": itemgetter("question") | bookretriever, 
     "question": itemgetter("question"),
     "q_a_pairs": itemgetter("q_a_pairs")} 
    | decomposition_prompt
    | llm
    | StrOutputParser())

    answer = rag_chain.invoke({"question":q,"q_a_pairs":q_a_pairs})
    q_a_pair = format_qa_pair(q,answer)
    q_a_pairs = q_a_pairs + "\n---\n"+  q_a_pair

In [30]:
print(answer)

To answer this question, I will combine the context provided with background information and relevant question + answer pairs.

Odysseus employs a series of clever tactics to outwit Polyphemus. One such tactic is his use of deception to disguise himself and escape from the cave. When Polyphemus asks him for his name, Odysseus replies "No man" (Noman). This ruse allows Odysseus to avoid being blinded by Polyphemus, who believes that no one has harmed him.

Another tactic employed by Odysseus is to have Philoetius and Melanthius stand guard at the door of the lair. When Agelaus suggests that someone go up to the trap door to inform the people what was happening, Melanthius points out the danger of being attacked by a single brave man who could prevent any number from getting in.

In addition, Odysseus uses his wit and cunning to escape from Polyphemus's lair. When the Cyclops attempts to block his exit, Odysseus blinds him with a heated stake. This allows Odysseus to escape, but it also 