# Query Translation

In [2]:
import bs4
from langchain import hub 
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from dotenv import load_dotenv

path = ".env"
load_dotenv(dotenv_path=path)
  

USER_AGENT environment variable not set, consider setting it to identify your requests.


True

In [None]:
# Load Document 
loader = WebBaseLoader(
    web_path=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=('post-content', 'post-title','post-header')
        )
    ),
)

doc = loader.load()

# Split 
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap=200)
split = text_splitter.split_documents(doc)


# Embed
vectorstore = Chroma.from_documents(documents=split, embedding= OpenAIEmbeddings())
 
# Retriever

retriever = vectorstore.as_retriever()

### Prompt

In [7]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnableLambda

template="""You are an AI language model assistant. Your task is to generate exactly five 
different versions of the given user question to retrieve relevant documents from a
vector database. By generating multiple perspectives on the user question, your goal is to help
overcome limitations of distance-based similarity search.

Follow these output rules:
- Generate EXACTLY five alternative questions
- Use ONLY newlines as separators (no numbers, bullets, or extra text)
- Do not include any additional explanations

Original question: {question}"""

prompt_perspective = ChatPromptTemplate.from_template(template)

generate_queries= (
    prompt_perspective
    | ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
    | StrOutputParser()
    | RunnableLambda(lambda x: x.split("\n"))
) 

In [16]:
from langchain.load import dumps, loads
from typing import List

def get_unique_union(documents: List[list]):
    """Unique union of retrieved documents."""
    # Flatten list of list and convert it into string
    flatten_docs = [dumps(doc) for sublist in documents for doc in sublist]
    
    # Get unique documents
    unique_docs = list(set(flatten_docs))
    
    # Return 
    return [loads(doc) for doc in unique_docs] 

# Retrieve
question = "What is the decomposition for LLM agents?"
retrieval_chain= generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question": question})
len(docs)

7

## RAG

### Multi-Query

![multi query RAG](../static/images/Multi-Query.png)

In [19]:
from langchain_openai import ChatOpenAI
from operator import itemgetter 

# RAG
template = """Answer the following question based on the context.
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = ChatOpenAI(temperature=0)

final_rag_chain = {"context": retrieval_chain, "question" : itemgetter("question")} | prompt | llm | StrOutputParser() 

final_rag_chain.invoke({"question": question})
   

'The decomposition for LLM agents includes breaking down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.'

### RAG Fusion

![RAG-Fusion](../static/images/RAG-Fusion.png)

In [32]:
from langchain_core.prompts import ChatPromptTemplate

# RAG-Fusion 
template="""You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search query related to: {question} \n
Output (4 queries):
"""

prompt_rag_fusion = ChatPromptTemplate.from_template(template) 

In [33]:
generate_queries=(
    prompt_rag_fusion
    | ChatOpenAI(temperature=0)
    | StrOutputParser()
    | RunnableLambda(lambda x : x.split('\n'))
)

In [34]:
from langchain.load import dumps, loads

def reciporcal_rank_fusion(results: list[list], k = 60):
    '''Reciporcal rank fusion that takes multiple lists of ranked documents and an optional
        parameter k used in the RRF formula
    '''
    
    # Initialized a dictionary to hold fused scores for each unique documents
    fused_score = {}
    
    # Iterate through each list of ranked documents
    for docs in results: 
        # Iterate through each document in the list, with its rank (position in the list) 
        
        for rank, doc in enumerate(docs):
            # Convert the document into string format to use as a key 
            doc_str = dumps(doc)
            # If the document is not in fused_score dictionary, add it to fused_score list with rank 0
            
            if doc_str not in fused_score:
                fused_score[doc_str] = 0
            
            # Retrieved the current score of the document
            previous_score = fused_score[doc_str]
            
            # Update the score of the document using the RFF formula : 1 / (rank + k)
            fused_score[doc_str] += 1 / (rank + k) 
        
    # Sort the documents based on their fused score in descending order to get the final reranked results
    reranked_results = [(loads(doc),score) for doc, score in sorted(fused_score.items(), key= lambda x:x[1], reverse= True ) ]
    
    # Return the reranked results based on their fused scores in descending order to get the final reranked results
    return reranked_results 

In [35]:
retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciporcal_rank_fusion  

docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs) 

7

In [None]:

# RAG

template="""Answer the following question based on this context: {context}
    Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = {"context": retrieval_chain_rag_fusion, "question": itemgetter("question")} | prompt | llm | StrOutputParser()

final_rag_chain.invoke({"question": question})

'The decomposition for LLM agents includes breaking down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks. Additionally, the agent can do self-criticism and self-reflection over past actions, learn from mistakes, and refine them for future steps to improve the quality of final results.'

## Decomposition 

In [38]:
# Decomposition 
template="""You are a helpful assistant that generate multiple sub-questions related to input question.\n
The goal is to break down the input in a set of sub-problems / sub-questions that can be answered in isolation. \n
Generate multiple search queries related to question : {question} \n
Output (3 queries):       
"""

prompt_decomposition = ChatPromptTemplate.from_template(template)

In [39]:
# Chain 
generated_queries_decomposition = prompt_decomposition | llm | StrOutputParser() | RunnableLambda(lambda x: x.split("\n") )

# Run 
question  =  "What are the main components of an LLM-powered autonomous agent system" 

questions = generated_queries_decomposition.invoke({"question": question})

In [40]:
questions

['1. What is LLM technology and how is it used in autonomous agent systems?',
 '2. What are the key components of an autonomous agent system?',
 '3. How does an LLM-powered autonomous agent system differ from other types of autonomous systems?']

![Decomposition](../static/images/decomposition.png) 

Papers:
- https://arxiv.org/pdf/2205.10625.pdf
- https://arxiv.org/pdf/2212.10509.pdf

#### Answer recursively 

In [46]:
# Prompt
template = """Here is the question you need to answer:

    \n --- \n {question} \n --- \n
    
    Here is any any available background question + answer pairs:
    
    \n --- \n {q_a_pairs} \n --- \n
    
    Here is additional context relevant to question:
    
    \n --- \n {context} \n --- \n     
    Use the above context and any background questions + answer pairs to answer the question: \n {question}    
"""

decomposition_prompt = ChatPromptTemplate.from_template(template)

In [47]:
from operator import itemgetter

def format_qa_pair(question, answer):
    """Format Q & A Pairs"""
    formatted_string="" 
    formatted_string += f"Question: {question} \n Answer: {answer}\n\n"
    return formatted_string.strip()

llm = ChatOpenAI(temperature=0)

q_a_pairs = ""

# Recursively call for each question.
for q in questions:
    
    rag_chain = (
        {"context": itemgetter("question") | retriever, "question": itemgetter("question") , "q_a_pairs": itemgetter("q_a_pairs")}
        | decomposition_prompt
        | llm
        | StrOutputParser()        
    )  
    
    answer = rag_chain.invoke({"question": q, "q_a_pairs": q_a_pairs})
    q_a_pairs = format_qa_pair(q, answer)
    q_a_pairs = q_a_pairs + "\n --- \n" + q_a_pairs

In [49]:
print(answer)

An LLM-powered autonomous agent system differs from other types of autonomous systems in that it utilizes a large language model (LLM) as its core controller, serving as the agent's brain. This system is complemented by key components such as planning, which involves breaking down tasks into subgoals and reflecting on past actions for refinement. Additionally, the system relies on natural language interfaces for communication with external components like memory and tools. The use of LLMs in autonomous agents allows for powerful problem-solving capabilities beyond just generating text, stories, or programs.


#### Answering each sub-question individially

![decomposition indivisually](../static/images/decomposition_indivisually.png)

In [52]:
from langchain import hub
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda

# RAG Prompt
prompt_rag = hub.pull("rlm/rag-prompt")

def retrieve_and_rag(question, prompt_rag, sub_question_generator_chain):
    """RAG on each sub-questions"""
    
    # Use sub_question_generator_chain to generate subquestions
    sub_questions = sub_question_generator_chain.invoke({"question": question}) 
    
    # Initialize list to hold RAG Chain
    rag_result = []
    
    for sub_question in sub_questions:
        
        # Retrieve relevant documents from retriever
        retrieved_docs= retriever.get_relevant_documents(sub_question)
        
        # Use retrieved documents and 
        answer = (prompt_rag | llm | StrOutputParser()).invoke({"context": retrieved_docs, "question": sub_question})
        
        rag_result.append(answer)
        
    return rag_result, sub_questions

# Wrap the retrieval & RAG process in a RunnableLambda for integrating it to chain
answers, questions = retrieve_and_rag(question, prompt_rag, generated_queries_decomposition) 

In [55]:
def format_qa_pairs(questions, answers):
    """Format Q & A pairs"""
    formatted_string = "" 
    
    for i , (question, answer) in enumerate(zip(questions, answers), start=1):
        formatted_string += f"Question {i}: {question}\nAnswer {i}: {answer}\n\n"
        
    return formatted_string.strip()
    
context = format_qa_pairs(questions, answers)

print(context)

Question 1: 1. What is LLM technology and how does it work in autonomous agent systems?
Answer 1: LLM technology refers to using large language models as the core controller in autonomous agent systems. In these systems, LLM functions as the agent's brain, enabling tasks such as planning, subgoal decomposition, reflection, refinement, and memory utilization. The reliability of natural language interfaces between LLMs and external components is a current challenge due to potential formatting errors and rebellious behavior.

Question 2: 2. What are the key components of an autonomous agent system?
Answer 2: The key components of an autonomous agent system include planning with subgoal decomposition, reflection and refinement, and memory. These components enable the agent to break down tasks, learn from past actions, and improve future results. The system is powered by a large language model acting as the agent's brain.

Question 3: 3. How does LLM technology enhance the performance of au

In [57]:
# Prompt
template="""Here is set of Q&A pairs: 

{context}
Use these to synthesize an answer to the question: {question} 
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain2 = prompt | llm | StrOutputParser()

result = final_rag_chain2.invoke({"question": question, "context": context})

In [58]:
print(result)

The main components of an LLM-powered autonomous agent system include the large language model (LLM) acting as the agent's brain, planning with subgoal decomposition, reflection and refinement, and memory utilization. These components work together to enable the agent to efficiently handle complex tasks, learn from past actions, and improve future performance. The LLM technology enhances the agent's ability to autonomously design, plan, and execute tasks by leveraging natural language interfaces, browsing the Internet, and executing code.


## Step-Back Prompting

In [61]:
# Few Shot example
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate

example = [
    {
        "input": "Could the member of The Police perform lawful arrest?",
        "output": "What can the member of Police do?"
    }, 
    {
        "input": "Anakin Skywalker was born on which planet?",
        "output": "What is Anakin Skywalker's personal history?"
    },
]   

# Transforming these to example messages

example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}")
    ]
) 

few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=example
)

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an expert at world knowledge. Your task is to step back and paraphrase
            a question to a more generic step-back question. """
        ),
        
        # Few Shot prompt
        few_shot_prompt,
        # New question
        ("user", "{question}")   ,     
    ]
)

prompt

ChatPromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are an expert at world knowledge. Your task is to step back and paraphrase\n            a question to a more generic step-back question. '), additional_kwargs={}), FewShotChatMessagePromptTemplate(examples=[{'input': 'Could the member of The Police perform lawful arrest?', 'output': 'What can the member of Police do?'}, {'input': 'Anakin Skywalker was born on which planet?', 'output': "What is Anakin Skywalker's personal history?"}], input_variables=[], input_types={}, partial_variables={}, example_prompt=ChatPromptTemplate(input_variables=['input', 'output'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={}), AIMessag

In [62]:
generate_queries_stepback = prompt | llm | StrOutputParser() 
question = "What is task decomposition?"
generate_queries_stepback.invoke({"question": question})

'What is the process of breaking down tasks into smaller components?'

In [65]:
# Response prompt 
response_prompt_template = """You are an expert of world knowledge. 
I am going to ask you a question. Your response should be comprehensive and based on the following context

# {normal_context}
# {step_back_context}

# Original question : {question}
# Answer: 
"""

response_prompt = ChatPromptTemplate.from_template(response_prompt_template)

chain = (
    {
        # Retrieve context using the normal question.
        "normal_context": RunnableLambda(lambda x : x["question"]) | retriever,
        # Retrieve context using the step-back question
        "step_back_context": generate_queries_stepback | retriever, 
        # Pass on the question
        "question": RunnableLambda(lambda x : x['question']) 
    }
    | response_prompt
    | llm
    | StrOutputParser()
)

chain.invoke({"question": question})

'Task decomposition is a crucial concept in the field of artificial intelligence and problem-solving. It involves breaking down a complex task into smaller, more manageable subtasks or steps. This process allows an agent or a system to better understand the task at hand and plan ahead effectively.\n\nOne common technique for task decomposition is the Chain of Thought (CoT), which prompts the model to think step by step, enabling it to decompose difficult tasks into simpler components. This approach helps in utilizing more computational resources during testing and sheds light on the model\'s thinking process.\n\nAnother extension of CoT is the Tree of Thoughts, which explores multiple reasoning possibilities at each step of the task decomposition. By generating multiple thoughts per step and creating a tree structure, this method enhances the agent\'s ability to tackle complex tasks.\n\nTask decomposition can be achieved through various methods, such as using simple prompts like "Steps

## HyDE 

In [66]:
# HyDE document generation
template = """Please write a scientific paper passage to answer the question. \n
Question: {question}
Passage:"""

prompt_hyde = ChatPromptTemplate.from_template(template)

generate_docs_for_retrieval = (
    prompt_hyde
    | llm
    | StrOutputParser()
)

# Run 
question = "What is task decomposition for LLM agents?"
generate_docs_for_retrieval.invoke({"question": question})

'Task decomposition is a crucial concept in the field of reinforcement learning for Large Language Models (LLMs). LLM agents are complex models that are trained to perform a wide range of tasks, such as natural language understanding, generation, and translation. Task decomposition involves breaking down a complex task into smaller, more manageable sub-tasks that can be solved independently or in sequence.\n\nBy decomposing a task, LLM agents can effectively leverage their capabilities to solve complex problems more efficiently. This approach allows the agent to focus on solving smaller, more specific sub-tasks, which can lead to improved performance and faster learning. Additionally, task decomposition can help LLM agents generalize better to new tasks by learning reusable sub-tasks that can be applied across different domains.\n\nOverall, task decomposition is a powerful technique for enhancing the capabilities of LLM agents and enabling them to tackle a wide range of complex tasks i

In [69]:
# Retrieve
retrieval_chain = generate_docs_for_retrieval | retriever

retrieved_docs = retrieval_chain.invoke({"question": question})

print(retrieved_docs)

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.\nAnother quite distinct approach, LLM+P (Liu et al. 2023), involves relying on an external classical planner to do long-horizon planning. This approach utilizes the Planning Domain Definition Language (PDDL) as an intermediate interface to describe the planning problem. In this process, LLM (1) translates the problem into “Problem PDDL”, then (2) requests a classical planner to generate a PDDL plan based on an existing “Domain PDDL”, and finally (3) translates the PDDL plan back into natural language. Essentially, the planning step is outsourced to an external tool, assuming the availability of domain-specific PDDL and a suitable planner

In [70]:
# RAG
template = """ Answer the following question based on the context: {context}
Question: {question}"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain_hyde = (
    prompt | llm | StrOutputParser()
)

final_rag_chain_hyde.invoke({"question": question, "context": retrieved_docs})

'Task decomposition for LLM agents can be done by using simple prompting like "Steps for XYZ", "What are the subgoals for achieving XYZ?", task-specific instructions, or with human inputs. Additionally, another approach involves relying on an external classical planner to do long-horizon planning, utilizing the Planning Domain Definition Language (PDDL) as an intermediate interface to describe the planning problem.'