# 索引

In [1]:
# 加载文档
import bs4
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
# 划分
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300,
    chunk_overlap=50
)
splits = text_splitter.split_documents(blog_docs)
splits[:3]

# 索引
import os
from langchain_community.vectorstores import Chroma
from ark_embedding import ArkEmbeddings

embd = ArkEmbeddings(
    model=os.getenv("ALIYUN_EMBEDDING_MODEL"),
    api_key=os.getenv("ALIYUN_API_KEY"),
    api_url=os.getenv("ALIYUN_API_URL"),
    batch_size=10
)
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=embd
)
retriever = vectorstore.as_retriever()


# 提示词

## Part 5: Multi Query

In [3]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableParallel

# 不同视角的multi query
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    model=os.getenv("ARK_MODEL"),
    api_key=os.getenv("ARK_API_KEY"),
    base_url=os.getenv("ARK_API_URL"),
    temperature=0.0,
)

# 构建处理链
# generate_queries = (
#     prompt_perspectives
#     | llm
#     | StrOutputParser()
#     | (lambda x: x.split("\n"))
# )

# 构建处理链 - 保留原始查询
generate_queries = (
    RunnableParallel({
        "original": RunnablePassthrough(),
        "variations": (
            prompt_perspectives
            | llm
            | StrOutputParser()
            | (lambda x: x.split("\n"))
        )
    })
    | (lambda x: [x["original"]["question"]] + x["variations"])
)

question = "What is task decomposition for LLM agents?"
multi_queries = generate_queries.invoke({"question": question})

from pprint import pprint
print(len(multi_queries))
pprint(multi_queries)

6
['What is task decomposition for LLM agents?',
 'What are the different methods for breaking down complex tasks into '
 'sub-tasks for LLM agents?',
 'How do LLM agents use task decomposition to solve problems?',
 'Explain the concept and process of task decomposition in the context of '
 'large language model agents.',
 'What role does task decomposition play in the architecture and functionality '
 'of an LLM agent?',
 'Why is decomposing a complex goal into smaller steps important for AI agents '
 'based on large language models?']


In [4]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """
    获取唯一并集
    """
    # 使用dumps和loads将文档转换为字符串进行去重，完成后再转换回文档
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    unique_docs = list(set(flattened_docs))
    unique_union = [loads(unique_doc) for unique_doc in unique_docs]
    return unique_union

# 检索
question = "What is task decomposition for LLM agents?"
retrieval_chain = (
    generate_queries 
    | retriever.map() 
    | get_unique_union
)
docs = retrieval_chain.invoke({"question": question})
len(docs)
pprint(docs[:3])

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content="Each element is an observation, an event directly provided by the agent.\n- Inter-agent communication can trigger new natural language statements.\n\n\nRetrieval model: surfaces the context to inform the agent’s behavior, according to relevance, recency and importance.\n\nRecency: recent events have higher scores\nImportance: distinguish mundane from core memories. Ask LM directly.\nRelevance: based on how related it is to the current situation / query.\n\n\nReflection mechanism: synthesizes memories into higher level inferences over time and guides the agent’s future behavior. They are higher-level summaries of past events (<- note that this is a bit different from self-reflection above)\n\nPrompt LM with 100 most recent observations and to generate 3 most salient high-level questions given a set of observations/statements. Then ask LM to answer those questions.\n\n\nPlanning & Reactin

  unique_union = [loads(unique_doc) for unique_doc in unique_docs]


In [None]:
# RAG
from operator import itemgetter
from langchain_openai import ChatOpenAI

template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = ChatOpenAI(
    model=os.getenv("ARK_MODEL"),
    api_key=os.getenv("ARK_API_KEY"),
    base_url=os.getenv("ARK_API_URL"),
    temperature=0.0,
)

final_rag_chain = (
    {
        "context": retrieval_chain,
        "question": itemgetter("question")
    }
    | prompt
    | llm
    | StrOutputParser()
)

result = final_rag_chain.invoke({"question": question})
pprint(result)

('Task decomposition for LLM agents is the process of breaking down a large, '
 'complex task into smaller, more manageable subgoals or steps. This allows '
 'the agent to handle complicated tasks more efficiently by focusing on one '
 'smaller step at a time.\n'
 '\n'
 'As described in the context, this can be achieved in several ways:\n'
 '(1) By instructing the LLM with simple prompting (e.g., "Steps for '
 'XYZ.\\n1.", "What are the subgoals for achieving XYZ?"),\n'
 '(2) By using task-specific instructions (e.g., "Write a story outline." for '
 'writing a novel), or\n'
 '(3) With human inputs.\n'
 '\n'
 'This technique is inspired by and extends the Chain of Thought (CoT) '
 'prompting method, which instructs the model to "think step by step" to '
 'decompose hard tasks.')


# Part 6: RAG-Fusion(融合)
相比multi-queries增加了倒排的环节，但未取topk，实际效果应差不多。

## 提示词

In [10]:
from langchain.prompts import ChatPromptTemplate

# RAG-Fusion: Related
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Provide these alternative questions separated by newlines. Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [11]:
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI


llm = ChatOpenAI(
    model=os.getenv("ARK_MODEL"),
    api_key=os.getenv("ARK_API_KEY"),
    base_url=os.getenv("ARK_API_URL"),
    temperature=0.0,
)

# 构建处理链 - 保留原始查询
generate_queries = (
    RunnableParallel({
        "original": RunnablePassthrough(),
        "variations": (
            prompt_rag_fusion
            | llm
            | StrOutputParser()
            | (lambda x: x.split("\n"))
        )
    })
    | (lambda x: [x["original"]["question"]] + x["variations"])
)

question = "What is task decomposition for LLM agents?"
multi_queries = generate_queries.invoke({"question": question})

from pprint import pprint
print(len(multi_queries))
pprint(multi_queries)

5
['What is task decomposition for LLM agents?',
 'What is task decomposition in the context of LLM agents?',
 'How do large language models break down complex tasks?',
 'Explain the process of task decomposition for AI agents.',
 'What are the methods and benefits of task decomposition for LLMs?']


In [13]:
# 倒排融合
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    """
    倒排融合
    """
    fused_scores = {}

    for docs in results:
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # 计算得分
            fused_scores[doc_str] += 1 / (rank + k)
    # 排序
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]
    return reranked_results

retrieval_chain_rag_fusion = (
    generate_queries
    | retriever.map()
    | reciprocal_rank_fusion
)

docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)
pprint(docs[:3])


[(Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and ref

In [None]:
from langchain_core.runnables import RunnablePassthrough

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {
        "context": retrieval_chain_rag_fusion,
        "question": itemgetter("question")
    }
    | prompt
    | llm
    | StrOutputParser()
)

result = final_rag_chain.invoke({"question": question})

from pprint import pprint
pprint(result)

('Task decomposition for LLM agents is the process of breaking down a large, '
 'complex task into smaller, more manageable sub-tasks or steps.\n'
 '\n'
 'This is a core component of the "Planning" module in an LLM-powered '
 'autonomous agent system. The technique allows the agent to handle complex '
 'tasks more efficiently by focusing on one smaller step at a time. Common '
 'methods to achieve task decomposition include:\n'
 '\n'
 '*   **Chain of Thought (CoT):** Instructing the model to "think step by '
 'step" to decompose the task.\n'
 '*   **Tree of Thoughts (ToT):** Extending CoT by exploring multiple possible '
 'reasoning steps at each point, creating a tree of potential paths.\n'
 '*   **Simple Prompting:** Using prompts like "Steps for XYZ" or "What are '
 'the subgoals for achieving XYZ?"\n'
 '*   **Task-Specific Instructions:** Using instructions tailored to a '
 'specific goal, such as "Write a story outline."\n'
 '*   **Human Input:** Decomposing tasks based on direct 

# Part 7: 分解 Decomposition
将原问题分解为多个子问题

In [18]:
from langchain.prompts import ChatPromptTemplate

# Decomposition
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Generate multiple search queries related to: {question} \n
Output (3 queries):"""
prompt_decomposition = ChatPromptTemplate.from_template(template)

In [20]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

# LLM
llm = ChatOpenAI(
    model=os.getenv("ARK_MODEL"),
    api_key=os.getenv("ARK_API_KEY"),
    base_url=os.getenv("ARK_API_URL"),
    temperature=0.0,
)

# Chain
generate_queries_decomposition = (
    prompt_decomposition
    | llm
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

# Run
question = "What are the main components of an LLM-powered autonomous agent system?"
questions = generate_queries_decomposition.invoke({"question":question})

from pprint import pprint
pprint(questions)

['1. What are the key architectural components of a large language model (LLM) '
 'autonomous agent?',
 '2. How does an autonomous agent system break down a task using an LLM?',
 '3. What tools and frameworks are commonly used to build LLM-powered '
 'autonomous agents?']


## 递归地进行回答

In [22]:
# Prompt
template = """Here is the question you need to answer:

\n --- \n {question} \n --- \n

Here is any available background question + answer pairs:

\n --- \n {q_a_pairs} \n --- \n

Here is additional context relevant to the question: 

\n --- \n {context} \n --- \n

Use the above context and any background question + answer pairs to answer the question: \n {question}
"""

decomposition_prompt = ChatPromptTemplate.from_template(template)

In [24]:
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser

def format_qa_pair(question, answer):
    """
    格式化问题和答案对
    """
    formatted_string = f"Question: {question}\nAnswer: {answer}\n\n"
    return formatted_string.strip()

# LLM
llm = ChatOpenAI(
    model=os.getenv("ARK_MODEL"),
    api_key=os.getenv("ARK_API_KEY"),
    base_url=os.getenv("ARK_API_URL"),
    temperature=0.0,
)

# 递归地回答
q_a_pairs = ""
for q in questions + [question]:
    rag_chain = (
        {
            "context": itemgetter("question") | retriever,
            "question": itemgetter("question"),
            "q_a_pairs": itemgetter("q_a_pairs"),
        }
        | decomposition_prompt
        | llm
        | StrOutputParser()
    )
    answer = rag_chain.invoke({"question": q, "q_a_pairs": q_a_pairs})
    q_a_pairs += "\n---\n" + format_qa_pair(q, answer)

from pprint import pprint
pprint(answer)

('Based on the provided context, the main components of an LLM-powered '
 'autonomous agent system are:\n'
 '\n'
 "1.  **LLM Core (The Agent's Brain):** The large language model functions as "
 'the core controller or "brain" of the entire agent system.\n'
 '2.  **Planning:** This component is responsible for:\n'
 '    *   **Subgoal and Decomposition:** Breaking down large, complex tasks '
 'into smaller, more manageable subgoals.\n'
 '    *   **Reflection and Refinement:** Enabling the agent to perform '
 'self-criticism and learn from past mistakes to refine future actions and '
 'improve results.\n'
 '3.  **Memory:** This component provides the agent with the capability to '
 'store and retrieve information. It consists of:\n'
 "    *   **Short-term memory:** This is considered the model's in-context "
 'learning, used for immediate tasks.\n'
 '    *   **Long-term memory:** This allows the agent to retain and recall '
 'information over extended periods, often by leveraging an exter

## 独立回答（可并行）
使用asyncio和ainvoke可以实现并行

In [26]:
from langchain import hub
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

# RAG prompt
prompt_rag = hub.pull("rlm/rag-prompt")
print("prompt:")
print(prompt.messages[0].prompt.template)
print("="*50)

def retrieve_and_rag(question, prompt_rag, sub_question_generator_chain):
    """子问题进行RAG"""

    # 问题分解
    sub_questions = sub_question_generator_chain.invoke({"question": question})

    # 子问题rag
    rag_results = []
    for sub_question in sub_questions:
        retrieved_docs = retriever.get_relevant_documents(sub_question)

        answer = (
            prompt_rag
            | llm
            | StrOutputParser()
        )
        answer.invoke({"context": retrieved_docs, "question": sub_question})
        rag_results.append(answer)
    
    return rag_results, sub_questions

answers, questions = retrieve_and_rag(
    question,
    prompt_rag,
    generate_queries_decomposition
)

prompt:
Answer the following question based on this context:

{context}

Question: {question}



  retrieved_docs = retriever.get_relevant_documents(sub_question)


In [27]:
# final answer
def format_qa_pairs(question, answer):
    """
    格式化问题和答案对
    """
    formatted_string = ""
    for i, (question, answer) in enumerate(zip(questions, answers), start=1):
        formatted_string += f"Question {i}: {question}\nAnswer {i}: {answer}\n\n"
    return formatted_string.strip()

context = format_qa_pairs(questions, answers)

# prompt
template = """Here is a set of Q+A pairs:

{context}

Use these to synthesize an answer to the question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | llm
    | StrOutputParser()
)

final_answer = final_rag_chain.invoke({"question": question, "context": context})
pprint(final_answer)

('Based on the provided Q&A pairs, the main components of an LLM-powered '
 'autonomous agent system are:\n'
 '\n'
 '1.  **A Prompt Template:** This is the first component '
 '(`first=ChatPromptTemplate`), which structures the input for the LLM. It '
 "defines the system's role, provides instructions (e.g., to use retrieved "
 "context, be concise), and formats the user's question and any relevant "
 'context.\n'
 '2.  **The Large Language Model (LLM) Core:** This is the middle component '
 '(`middle=[ChatOpenAI]`), which is the reasoning engine of the agent. It '
 'processes the formatted prompt from the template to generate responses, make '
 'decisions, and perform computations. The configuration (e.g., `model_name`, '
 '`temperature`) is defined here.\n'
 '3.  **An Output Parser:** This is the last component '
 '(`last=StrOutputParser()`), which takes the raw output from the LLM and '
 'transforms it into a final, usable format for the user or the next step in a '
 'system.\n'
 '\n

总结：
感觉递归的回答效果要好一些

# Part 8: Step Back（回退）
之前的策略是使用更“具体”的问题进行检索，这里反其道而行，使用更“抽象”的问题进行检索，能对问题有一个“更高层次”的认识。