# 索引

In [1]:
# 加载文档
import bs4
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
# 划分
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300,
    chunk_overlap=50
)
splits = text_splitter.split_documents(blog_docs)
splits[:3]

# 索引
import os
from langchain_community.vectorstores import Chroma
from ark_embedding import ArkEmbeddings

embd = ArkEmbeddings(
    model=os.getenv("ALIYUN_EMBEDDING_MODEL"),
    api_key=os.getenv("ALIYUN_API_KEY"),
    api_url=os.getenv("ALIYUN_API_URL"),
    batch_size=10
)
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=embd
)
retriever = vectorstore.as_retriever()


In [3]:
from langchain_core.runnables import RunnableLambda

def retrieve_with_scores(q):
    try:
        pairs = vectorstore.similarity_search_with_relevance_scores(q)  # 分数越大越相关[0,1]
    except Exception:
        print("Get relevance score failed. current score is distance score.")
        pairs = vectorstore.similarity_search_with_score(q)             # 距离分数，数值越小越相似
    docs = []
    for doc, score in pairs:
        doc.metadata["score"] = float(score)
        docs.append(doc)
    return docs

retriever_with_score = RunnableLambda(lambda x: retrieve_with_scores(x)).map()

In [4]:
def format_docs_func(docs):
    return "\n\n".join(doc.page_content for doc in docs)

format_docs = RunnableLambda(format_docs_func)

# 提示词

## Part 5: Multi Query

主要想法：构造多个相似问题，进行全面的检索。

In [5]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableParallel

# 不同视角的multi query
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    model=os.getenv("ARK_MODEL"),
    api_key=os.getenv("ARK_API_KEY"),
    base_url=os.getenv("ARK_API_URL"),
    temperature=0.0,
)

# 构建处理链
# generate_queries = (
#     prompt_perspectives
#     | llm
#     | StrOutputParser()
#     | (lambda x: x.split("\n"))
# )

# 构建处理链 - 保留原始查询
generate_queries = (
    RunnableParallel({
        "original": RunnablePassthrough(),
        "variations": (
            prompt_perspectives
            | llm
            | StrOutputParser()
            | (lambda x: x.split("\n"))
        )
    })
    | (lambda x: [x["original"]["question"]] + x["variations"])
)

question = "What is task decomposition for LLM agents?"
multi_queries = generate_queries.invoke({"question": question})

from pprint import pprint
print(len(multi_queries))
pprint(multi_queries)

6
['What is task decomposition for LLM agents?',
 'What are the different strategies for breaking down complex tasks for Large '
 'Language Model agents?',
 'How do LLM-based agents decompose a high-level goal into smaller, executable '
 'sub-tasks?',
 'Explain the process of task decomposition in the context of autonomous AI '
 'agents.',
 'What role does planning and step-by-step breakdown play in the functionality '
 'of LLM agents?',
 'Can you describe methods for hierarchical task decomposition used by '
 'advanced language model agents?']


In [6]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """
    获取唯一并集
    """
    # 使用dumps和loads将文档转换为字符串进行去重，完成后再转换回文档
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    unique_docs = list(set(flattened_docs))
    unique_union = [loads(unique_doc) for unique_doc in unique_docs]
    return unique_union

# 检索
question = "What is task decomposition for LLM agents?"
retrieval_chain = (
    generate_queries 
    | retriever.map() 
    | get_unique_union
)
docs = retrieval_chain.invoke({"question": question})
len(docs)
pprint(docs[:3])

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Component One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a

  unique_union = [loads(unique_doc) for unique_doc in unique_docs]


In [7]:
# RAG
from operator import itemgetter
from langchain_openai import ChatOpenAI

template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = ChatOpenAI(
    model=os.getenv("ARK_MODEL"),
    api_key=os.getenv("ARK_API_KEY"),
    base_url=os.getenv("ARK_API_URL"),
    temperature=0.0,
)

final_rag_chain = (
    {
        "context": retrieval_chain | format_docs,
        "question": itemgetter("question")
    }
    | prompt
    | llm
    | StrOutputParser()
)

result = final_rag_chain.invoke({"question": question})
pprint(result)

('Based on the provided context, task decomposition for LLM agents is the '
 'process where a large language model (LLM) breaks down a complicated, '
 'high-level task into smaller, simpler, and more manageable sub-tasks or '
 'steps.\n'
 '\n'
 'Key points from the context include:\n'
 '*   It is a core component of planning, allowing an agent to handle complex '
 'tasks efficiently.\n'
 '*   Techniques like Chain of Thought (CoT) instruct the model to "think step '
 'by step" to perform this decomposition.\n'
 '*   More advanced techniques like Tree of Thoughts (ToT) explore multiple '
 'reasoning possibilities for each step.\n'
 '*   Decomposition can be achieved through simple prompting (e.g., "Steps for '
 'XYZ."), using task-specific instructions, or with human input.')


# Part 6: RAG-Fusion(融合)
- Forget RAG, the Future is RAG-Fusion: https://medium.com/data-science/forget-rag-the-future-is-rag-fusion-1147298d8ad1  
- 作者的github实现仓库：https://github.com/Raudaschl/rag-fusion/blob/master/main.py

实现流程：
1. 通过 LLM 将用户的查询转换为相似但不同的查询。
2. 对原始查询及其新生成的查询执行向量搜索。
3. 使用倒数排序融合聚合和优化所有结果。
4. 引导LLM生成结果时，考虑了所有查询和重新排序的结果列表（关键点：在提示词中明确告知llm文档是ranked，也就是重要度由高到低）。

倒数排序融合（RRF，Reciprocal Rank Fusion）是一种结合多个搜索结果列表的排名以生成单一、统一排名的技术。RRF的计算公式如下：
$$
\mathrm{RRF}_{\text{score}}(d \in D) = \sum_{r \in R} \frac{1}{k + r(d)}
$$
其中$k=60$，r(d)表示对应文档d在其查询下排序（从1开始）。

![RRF 图](static/rrf.webp)

## 提示词

In [8]:
from langchain.prompts import ChatPromptTemplate

# RAG-Fusion: Related
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Provide these alternative questions separated by newlines. Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [9]:
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI


llm = ChatOpenAI(
    model=os.getenv("ARK_MODEL"),
    api_key=os.getenv("ARK_API_KEY"),
    base_url=os.getenv("ARK_API_URL"),
    temperature=0.0,
)

# 构建处理链 - 保留原始查询
generate_queries = (
    RunnableParallel({
        "original": RunnablePassthrough(),
        "variations": (
            prompt_rag_fusion
            | llm
            | StrOutputParser()
            | (lambda x: x.split("\n"))
        )
    })
    | (lambda x: [x["original"]["question"]] + x["variations"])
)

question = "What is task decomposition for LLM agents?"
multi_queries = generate_queries.invoke({"question": question})

from pprint import pprint
print(len(multi_queries))
pprint(multi_queries)

5
['What is task decomposition for LLM agents?',
 'What is task decomposition in the context of LLM agents?',
 'How do large language models break down complex tasks?',
 'What are the methods and techniques for task decomposition with AI agents?',
 'Why is task decomposition important for autonomous LLM agents?']


In [10]:
# 倒排融合
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    """
    倒排融合
    """
    fused_scores = {}

    for docs in results:
        # 由于原本返回的结果，就是按relvance score排序的，此步可以省略。
        # docs = sorted(docs, key=lambda x: x.metadata["score"], reverse=True)
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # 计算得分
            fused_scores[doc_str] += 1 / (rank + k)
    # 排序
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]
    return reranked_results

retrieval_chain_rag_fusion = (
    generate_queries
    | retriever_with_score
    | reciprocal_rank_fusion
)

docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)
pprint(docs[:3])


[(Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'score': 0.5758502166824937}, page_content='LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions

In [19]:
from langchain_core.runnables import RunnableLambda

# RAG
template = """Output based on questions and reranked documents,
questions:
{questions}

reranked documents:
{context}
"""

prompt = ChatPromptTemplate.from_template(template)

def format_rank_fusion_func(docs_with_score):
    for rank, (d, s) in enumerate(docs_with_score, 1):
        d.page_content = f'doc {rank}, similarity score={s:.2f}, content:\n"{d.page_content}"'
    return "\n\n".join(d.page_content for d, _ in docs_with_score)

format_rank_fusion = RunnableLambda(format_rank_fusion_func)

final_rag_chain = (
    generate_queries
    | {
        "context": retriever_with_score | reciprocal_rank_fusion | format_rank_fusion,
        "questions": RunnableLambda(lambda x: "\n".join(x))
    }
    | prompt
    | llm
    | StrOutputParser()
)

result = final_rag_chain.invoke({"question": question})

from pprint import pprint
pprint(result)

('Based on the provided documents, here is a comprehensive answer to your '
 'questions about task decomposition for LLM agents.\n'
 '\n'
 '### What is Task Decomposition for LLM Agents?\n'
 '\n'
 'In the context of LLM-powered autonomous agents, **task decomposition** '
 '(also referred to as **subgoal decomposition**) is a fundamental planning '
 'technique where a large, complex task is broken down into a series of '
 'smaller, more manageable subgoals or steps. This allows the agent to tackle '
 'problems that are too complicated to solve in a single step efficiently.\n'
 '\n'
 'The LLM acts as the agent\'s "brain" and uses this capability to enable the '
 'efficient handling of complex tasks by focusing on one achievable step at a '
 'time.\n'
 '\n'
 '### How do LLMs Break Down Complex Tasks? (Methods)\n'
 '\n'
 'According to the documents, task decomposition can be achieved through '
 'several methods:\n'
 '\n'
 '1.  **Chain of Thought (CoT):** This is a standard prompting techni

总结：重新调整了rag-fusion的实现，效果好多了。

# Part 7: 分解 Decomposition
将原问题分解为多个子问题

Paper:
- https://arxiv.org/pdf/2205.10625.pdf
- https://arxiv.org/abs/2212.10509.pdf

In [29]:
from langchain.prompts import ChatPromptTemplate

# Decomposition
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Provide these alternative questions separated by newlines. Generate multiple search queries related to: {question} \n
Output (3 queries):"""
prompt_decomposition = ChatPromptTemplate.from_template(template)

In [30]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

# LLM
llm = ChatOpenAI(
    model=os.getenv("ARK_MODEL"),
    api_key=os.getenv("ARK_API_KEY"),
    base_url=os.getenv("ARK_API_URL"),
    temperature=0.0,
)

# Chain
generate_queries_decomposition = (
    prompt_decomposition
    | llm
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

# Run
question = "What are the main components of an LLM-powered autonomous agent system?"
questions = generate_queries_decomposition.invoke({"question":question})

from pprint import pprint
pprint(questions)

['What are the key architectural components of a Large Language Model (LLM) '
 'agent system?',
 'How does an autonomous agent system using an LLM for reasoning typically '
 'work?',
 'What are the main modules or parts that make up an LLM-powered autonomous '
 'agent?']


## 递归地进行回答

In [31]:
# Prompt
template = """Here is the question you need to answer:

\n --- \n {question} \n --- \n

Here is any available background question + answer pairs:

\n --- \n {q_a_pairs} \n --- \n

Here is additional context relevant to the question: 

\n --- \n {context} \n --- \n

Use the above context and any background question + answer pairs to answer the question: \n {question}
"""

decomposition_prompt = ChatPromptTemplate.from_template(template)

In [32]:
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser

def format_qa_pair(question, answer):
    """
    格式化问题和答案对
    """
    formatted_string = f"Question: {question}\nAnswer: {answer}\n\n"
    return formatted_string.strip()

# LLM
llm = ChatOpenAI(
    model=os.getenv("ARK_MODEL"),
    api_key=os.getenv("ARK_API_KEY"),
    base_url=os.getenv("ARK_API_URL"),
    temperature=0.0,
)

# 递归地回答
q_a_pairs = ""
for q in questions + [question]:
    rag_chain = (
        {
            "context": itemgetter("question") | retriever | format_docs,
            "question": itemgetter("question"),
            "q_a_pairs": itemgetter("q_a_pairs"),
        }
        | decomposition_prompt
        | llm
        | StrOutputParser()
    )
    answer = rag_chain.invoke({"question": q, "q_a_pairs": q_a_pairs})
    q_a_pairs += "\n---\n" + format_qa_pair(q, answer)

from pprint import pprint
pprint(answer)

('Based on the provided context, the main components of an LLM-powered '
 'autonomous agent system are:\n'
 '\n'
 '1.  **LLM Core (The "Brain")**: The Large Language Model functions as the '
 "agent's central controller and reasoning engine, processing information and "
 'making decisions.\n'
 '\n'
 '2.  **Planning**: This component is responsible for:\n'
 '    *   **Subgoal and Decomposition**: Breaking down large, complex tasks '
 'into smaller, manageable subgoals.\n'
 '    *   **Reflection and Refinement**: Engaging in self-criticism and '
 'self-reflection on past actions to learn from mistakes and improve future '
 'steps and the quality of final results.\n'
 '\n'
 '3.  **Memory**: This component provides the agent with the capability to '
 'retain and recall information, and consists of:\n'
 "    *   **Short-term Memory**: Utilizes the LLM's in-context learning within "
 'its finite context window for immediate processing.\n'
 '    *   **Long-term Memory**: Leverages an external

## 独立回答（可并行）
使用asyncio和ainvoke可以实现并行

In [44]:
from langchain import hub
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

# RAG prompt
prompt_rag = hub.pull("rlm/rag-prompt")
print("prompt:")
print(prompt.messages[0].prompt.template)
print("="*50)

def retrieve_and_rag(question, prompt_rag, sub_question_generator_chain):
    """子问题进行RAG"""

    # 问题分解
    sub_questions = sub_question_generator_chain.invoke({"question": question})

    # 子问题rag
    rag_results = []
    for sub_question in sub_questions:
        retrieved_docs = retriever.invoke(sub_question)

        answer = (
            prompt_rag
            | llm
            | StrOutputParser()
        )
        answer = answer.invoke({"context": format_docs_func(retrieved_docs), "question": sub_question})
        rag_results.append(answer)
    
    return rag_results, sub_questions

answers, questions = retrieve_and_rag(
    question,
    prompt_rag,
    generate_queries_decomposition
)

prompt:
Here is a set of Q+A pairs:

{context}

Use these to synthesize an answer to the question: {question}



In [46]:
# final answer
def format_qa_pairs(question, answer):
    """
    格式化问题和答案对
    """
    formatted_string = ""
    for i, (question, answer) in enumerate(zip(questions, answers), start=1):
        formatted_string += f"Question {i}: {question}\nAnswer {i}: {answer}\n\n"
    return formatted_string.strip()

context = format_qa_pairs(questions, answers)

# prompt
template = """Here is a set of Q+A pairs:

{context}

Use these to synthesize an answer to the question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | llm
    | StrOutputParser()
)

final_answer = final_rag_chain.invoke({"question": question, "context": context})
pprint(final_answer)

('Based on the provided Q&A pairs, the main components of an LLM-powered '
 'autonomous agent system are:\n'
 '\n'
 '1.  **The LLM Core (Controller/Reasoning Engine):** This is the central '
 '"brain" of the agent. The LLM is responsible for high-level reasoning, '
 "decision-making, and controlling the agent's overall actions.\n"
 '\n'
 '2.  **Planning Module:** This component handles task decomposition and '
 'self-reflection. It breaks down large, complex tasks into manageable '
 "subgoals and refines the agent's approach based on outcomes.\n"
 '\n'
 '3.  **Memory Module:** This includes both short-term and long-term memory, '
 'allowing the agent to retain and recall information across interactions to '
 'maintain context and learn from past experiences.\n'
 '\n'
 '4.  **Tool Use Module:** This enables the agent to interact with external '
 'data sources and environments by calling external APIs and functions, '
 'greatly expanding its capabilities beyond its built-in knowledge.\n'

总结：
感觉递归的回答效果要好一些

# Part 8: Step Back（回退）
之前的策略是使用更“具体”的问题进行检索，这里反其道而行，使用更“抽象”的问题进行检索，能对问题有一个“更高层次”的认识。  
Paper  
- https://arxiv.org/pdf/2310.06117.pdf

In [47]:
# 通过few-shots的方式引导LLM提出一个更通用（抽象层次更高）的问题
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
examples = [
    {
        "input": "Could the members of The Police perform lawful arrests?",
        "output": "what can the members of The Police do?",
    },
    {
        "input": "Jan Sindel’s was born in what country?",
        "output": "what is Jan Sindel’s personal history?",
    },
]
example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}")
    ]
)
few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)
system_prompt = (
    "You are an expert at world knowledge. "
    "Your task is to step back and paraphrase a question "
    "to a more generic step-back question, "
    "which is easier to answer. Here are a few examples:"
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        few_shot_prompt,
        ("user", "{question}")
    ]
)

In [48]:
import os
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

# LLM
llm = ChatOpenAI(
    model=os.getenv("ARK_MODEL"),
    api_key=os.getenv("ARK_API_KEY"),
    base_url=os.getenv("ARK_API_URL"),
    temperature=0.0,
)

# 构建处理链
generate_queries_step_back = (
    prompt
    | llm
    | StrOutputParser()
)
# 调用
question = "What is task decomposition for LLM agents?"
step_back_query = generate_queries_step_back.invoke({"question": question})

from pprint import pprint
pprint(step_back_query)

'What is task decomposition in the context of artificial intelligence?'


In [52]:
from langchain_core.runnables import RunnableLambda

# Response prompt 
# 按如下的结构构建prompt:
# {normal_context}
# {step_back_context}
# Original Question: {question}
# Answer:
response_prompt_template = (
    "You are an expert of world knowledge. "
    "I am going to ask you a original question. Your response "
    "should be comprehensive and not contradicted "
    "with the following context if they are relevant. "
    "Otherwise, ignore them if they are not relevant.\n\n"
    "# {normal_context}\n"
    "# {step_back_context}\n\n"
    "# Original Question: {question}\n"
    "# Answer:"
)
response_prompt = ChatPromptTemplate.from_template(response_prompt_template)

# 构建处理链
chain = (
    {
        # 通过原始问题搜索相关信息
        "normal_context": RunnableLambda(lambda x: x["question"]) | retriever | format_docs,
        # 通过step back问题搜索相关信息
        "step_back_context": generate_queries_step_back | retriever | format_docs,
        # 原始问题
        "question": lambda x: x["question"],
    }
    | response_prompt
    | llm
    | StrOutputParser()
)
final_answer = chain.invoke({"question": question})

from pprint import pprint
pprint(final_answer)

('Based on the provided context, **task decomposition for LLM agents** is a '
 'core component of the planning module. It is the process by which a Large '
 'Language Model (LLM) breaks down a large, complex task into smaller, more '
 'manageable subgoals or steps. This enables the autonomous agent to handle '
 'tasks that are too complicated to be solved in a single step.\n'
 '\n'
 'Here are the key details about task decomposition as described in the '
 'context:\n'
 '\n'
 '1.  **Purpose:** The primary purpose is to enable "efficient handling of '
 'complex tasks." By decomposing a task, the agent can tackle it in a '
 'structured, step-by-step manner.\n'
 '\n'
 '2.  **Connection to Known Techniques:** The concept is directly linked to '
 'the "Chain of Thought" (CoT) prompting technique. The context states that '
 '"CoT transforms big tasks into multiple manageable tasks," making it a '
 'fundamental method for achieving decomposition. It is also extended by more '
 'advanced techni

总结：step-back是目前感觉效果最好的方法，且效率也比较高。回答的主要特点就是“全面”，这也是step-back的主要收益，但同时可能也会显得有些“啰嗦”。

# Part 9: HyDE

Hypothetical Document Embeddings (HyDE)  
主要的思想是，问题和答案可能不在一个嵌入空间，通过生成“假设”文档，让问题和待检索的doc处于同一空间。  
具体做法：
1. 给定一个查询，HyDE 首先让LLM根据问题生成一个与问题相关的“假设”文档。
2. 然后，一个无监督对比学习编码器（例如 Contriever）将该文档编码成一个嵌入向量。
3. 根据生成的“假设”文档检索相似的真实文档。  
Paper:  
- https://arxiv.org/abs/2212.10496

In [53]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from pprint import pprint

### 生成假设文档
# 构造提示词
template = (
    "Please write a scientific paper passage to answer the question\n"
    "Question: {question}\n"
    "Passage:"
)
prompt_hyde = ChatPromptTemplate.from_template(template)

# LLM
llm = ChatOpenAI(
    model=os.getenv("ARK_MODEL"),
    api_key=os.getenv("ARK_API_KEY"),
    base_url=os.getenv("ARK_API_URL"),
    temperature=0.0,
)

# 构建处理链
generate_docs_for_retrieval = (
    prompt_hyde
    | llm
    | StrOutputParser()
)

# 获取假设文档
question = "What is task decomposition for LLM agents?"
hyde_doc = generate_docs_for_retrieval.invoke({"question": question})
pprint(hyde_doc)

('Of course. Here is a scientific paper passage answering the question: "What '
 'is task decomposition for LLM agents?"\n'
 '\n'
 '---\n'
 '\n'
 '### **Task Decomposition in Large Language Model Agents**\n'
 '\n'
 '**Abstract:** The efficacy of Large Language Model (LLM) agents in complex, '
 'multi-step reasoning and problem-solving is fundamentally constrained by '
 'their inherent limitations in planning, context window size, and '
 'susceptibility to error propagation. To mitigate these constraints, **task '
 'decomposition** has emerged as a critical cognitive architectural paradigm. '
 'This paper delineates the conceptual framework of task decomposition, its '
 'operational methodologies, and its significance in enabling robust agentic '
 'behavior.\n'
 '\n'
 '**1. Introduction**\n'
 'An LLM agent is a system that utilizes a large language model as its core '
 'computational engine to perceive its environment, reason over goals, and '
 'execute actions autonomously. While power

In [56]:
# 使用hyde_doc进行检索
retrieval_chain = (
    generate_docs_for_retrieval
    | retriever
)
retrieved_docs = retrieval_chain.invoke({"question": question})
pprint(retrieved_docs)

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refi

In [57]:
### final answer
template = (
    "Answer the following question based on this context:\n\n"
    "{context}\n\n"
    "Question: {question}"
)
prompt = ChatPromptTemplate.from_template(template)
final_rag_chain = (
    prompt
    | llm
    | StrOutputParser()
)
final_answer = final_rag_chain.invoke({
    "context": format_docs_func(retrieved_docs),
    "question": question
})
pprint(final_answer)

('Based on the provided context, **task decomposition for LLM agents** is the '
 'process of breaking down a large, complex task into smaller, more manageable '
 'subgoals or steps. This is a core component of the "Planning" module in an '
 'LLM-powered autonomous agent system.\n'
 '\n'
 'The text provides the following key details about task decomposition:\n'
 '\n'
 '*   **Purpose:** It enables the agent to handle complex tasks efficiently.\n'
 '*   **Methods:** It can be achieved in several ways:\n'
 '    1.  **Using LLM with simple prompting**, such as "Steps for XYZ.\\n1." '
 'or "What are the subgoals for achieving XYZ?".\n'
 '    2.  **By using task-specific instructions**, for example, instructing '
 'the agent to "Write a story outline." as a first step to writing a novel.\n'
 '    3.  **With human inputs**.\n'
 '*   **Related Techniques:** The concept is linked to standard prompting '
 'techniques like **Chain of Thought (CoT)**, which instructs the model to '
 '"think step by

总结：Hyde感觉效果也还可以，但没有特别好。