In [1]:
import os
from dotenv import load_dotenv

# 加载 .env 文件中的OpenAI API环境变量
load_dotenv()

True

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
import os

USER_AGENT environment variable not set, consider setting it to identify your requests.


# 导入博客内容

In [3]:
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
data = loader.load()

# 文本分块

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=0)
splits = text_splitter.split_documents(data)
splits

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final resu

# 对分块内容进行嵌入

In [5]:
embedding = OpenAIEmbeddings()
vectordb = FAISS.from_documents(documents=splits, embedding=embedding)

# 准备多查询检索器

In [6]:
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_openai import ChatOpenAI

question = "What are the approaches to Task Decomposition?"
llm = ChatOpenAI(temperature=0)
retriever_from_llm = MultiQueryRetriever.from_llm(retriever=vectordb.as_retriever(), llm=llm)
retriever_from_llm

MultiQueryRetriever(retriever=VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x105ee2c10>, search_kwargs={}), llm_chain=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='You are an AI language model assistant. Your task is \n    to generate 3 different versions of the given user \n    question to retrieve relevant documents from a vector  database. \n    By generating multiple perspectives on the user question, \n    your goal is to help the user overcome some of the limitations \n    of distance-based similarity search. Provide these alternative \n    questions separated by newlines. Original question: {question}')
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x11ab2d310>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x11b3333d0>, root_client=<openai.OpenAI object at 0x11b1d6090>, root_async_client=<opena

# 设置日志查看使用multi_query生成的query

In [7]:
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

In [8]:
unique_docs = retriever_from_llm.invoke(question)
len(unique_docs)

INFO:langchain.retrievers.multi_query:Generated queries: ['1. How can Task Decomposition be achieved through different methods?', '2. What strategies are commonly used for Task Decomposition?', '3. What are the various techniques for breaking down tasks in Task Decomposition?']


5

# 生成答案

In [9]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate

qa_system_prompt = """        
        Use the following pieces of retrieved context to answer the question. \        If you don't know the answer, just say that you don't know. \
        {context}"""
        
qa_prompt = ChatPromptTemplate.from_messages([("system", qa_system_prompt),("human", "{question}"),])
def format_docs(docs):
    doc_strings = [doc.page_content for doc in docs]    
    return "\n\n".join(doc_strings)

rag_chain = ({"context": retriever_from_llm 
    | format_docs, "question": RunnablePassthrough()}    
    | qa_prompt    
    | llm    
    | StrOutputParser())
    
rag_chain.invoke("What are the approaches to Task Decomposition?")

INFO:langchain.retrievers.multi_query:Generated queries: ['1. How can Task Decomposition be achieved through different methods?', '2. What strategies can be used for breaking down tasks in Task Decomposition?', '3. What are the various techniques available for approaching Task Decomposition?']


'The approaches to Task Decomposition mentioned in the provided context are:\n\n1. Task decomposition can be done by LLM with simple prompting like "Steps for XYZ.\\n1."\n2. Breaking down large tasks into smaller, manageable subgoals\n\nIf you need more detailed information or additional approaches, please let me know.'