### 使用LangChain构建应用程序

#### PDF问答程序

In [None]:
# 安装tiktoken,PyPDF2,Faiss-GPU
!pip -q install tiktoken PyPDF2 faiss-gpu

In [None]:
# 导入必要的库
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS

In [None]:
# 调用PdfReader类加载pdf文件
doc_reader = PdfReader("./impromptu-rh.pdf")

In [None]:
# 使用PdfReader类的extract_text()函数提取PDF中的文字
raw_text = ""
for i,page in enumerate(doc_reader.pages):
    text = page.extract_text()
    if text:
        raw_text += text

In [None]:
# 使用LangChain内置的ChartacterTextSplitter类对文本进行分割
text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=1000,
    chunk_overlap=200,
    length_function = len
)

# 使用split_text()函数对文本进行分割
texts = text_splitter.split_text(raw_text)

In [None]:
# 检查块数
len(texts)

In [None]:
# 随机选择一个块进行检查
import random 

texts[random.randint(0, len(texts))]

In [None]:
# 加载OpenAI嵌入
embeddings = OpenAIEmbeddings()

In [None]:
# 创建PDF的向量存储库
docsearch = FAISS.from_texts(texts,embeddings)

In [None]:
# 执行搜索
query = "GPT-4 如何改变了社交媒体？"
docs = docsearch.similarity_search(query)

In [None]:
len(docs)

In [None]:
docs[0]

#### 创建问答链

In [None]:
# 加载LLM和QA链
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
chain = load_qa_chain(
    OpenAI(),
    chain_type = "stuff"
)

In [None]:
# 执行QA链
query = "这本书的作者是谁？"
docs = docsearch.similarity_search(query)

chain.run(input_documents=docs, question=query)

In [None]:
# 重新设置topk来获得更多的答案
docs = docsearch.similarity_search(query, k=6)
chain.run(input_documents=docs, question=query)

In [None]:
# 重新构建一个重排链
from langchain.chains.question_answering import load_qa_chain

chain = load_qa_chain(
    OpenAI(),
    chain_type = "map_rerank",
    return_intermediate_steps = True
)

In [None]:
# 执行QA链
query = "OpenAI的创始人是谁？"
docs = docsearch.similarity_search(query,k=10)
results = chain(
    {"input_documents":docs, "question":query},
    return_only_outputs = True
)

In [None]:
results

In [None]:
# 打印最终答案
results["output_text"]

In [None]:
# 检查LLM链的提示词模板
chain.llm_chain.prompt.template

In [None]:
# 构建一个检索QA链
docsearch = FAISS.from_texts(texts, embeddings)

from langchain.chains import RetrievalQA

retriever = docsearch.as_retriever(search_type="similarity",search_kwargs={"k":4})

In [None]:
# 创建QA链
rqa = RetrievalQA.from_chain_type(
    llm = OpenAI(),
    chain_type = "stuff",
    retriever = retriever,
    return_source_documents = True
)

In [None]:
# 执行查询
query = "OpenAI 是什么？"
rqa(query)['result']

In [None]:
query = "GPT-4 对创新力有什么影响？"
rqa(query)['result']

#### 对话式表单

#### OpenAI函数的标记链

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import ChatPromptTemplate
from pydantic import BaseModel,Field
from enum import Enum
from langchain.chains.openai_functions import (
    create_tagging_chain,
    create_tagging_chain_pydantic
)

In [None]:
# 定义个人数据模型
class PersonalDetails(BaseModel):
    # 定义数据类型
    name:str = Field(
        ...,
        description="这是用户输入的名字"
    )
    city:str = Field(
        ...,
        description="这是用户输入的城市"
    )
    email:str = Field(
        ...,
        description="这是用户输入的邮箱地址"
    )

In [None]:
# 创建聊天LLM
llm = ChatOpenAI(temperature=0)

In [None]:
# 创建标记链
chain = create_tagging_chain_pydantic(PersonalDetails,llm)

In [None]:
# 运行链
test_input = "你好，我叫大霖，我住在辽宁沈阳，我的邮箱是：876251324@xxx.com"
test_result = chain.run(test_input)

In [None]:
test_result

In [None]:
# 运行一个信息并不全面的输入
test_str = "我的电子邮箱地址是：876251324@xxx.com"
test_result = chain.run(test_str)

test_result

### 创建提示词模板

In [None]:
def ask_for_info(ask_for=["name","city","email"]):
    # 定义一个提示词模板
    first_prompt = ChatPromptTemplate.from_template(
        """
        假设你现在是一名前台，你现在需要对用户进行询问他个人的具体信息。
        不要跟用户打招呼！你可以解释你需要什么信息。不要说“你好！”！
        接下来你和用户之间的对话都是你来提问，凡是你说的都是问句。
        你每次随机选择{ask_for}列表中的一个项目，向用户提问。
        比如{"name","city"}列表，你可以随机选择一个"name"，
        你的问题是：“请问你的名字是什么？”
        """
    )

    info_gathering_chain = LLMChain(llm=llm,prompt=first_prompt)
    chat_chain = info_gathering_chain.run(ask_for=ask_for)

    return chat_chain

### 数据更新和检查

In [None]:
# 定义检查字段为空的函数
def check_what_is_empty(user_personal_details):
    ask_for = []
    # 检查项目是否为空
    for field,value in user_personal_details.dict().items():
        if value in [None,"",0]:
            print(f"{field}为空")
            ask_for.append(field)
    return ask_for

In [None]:
user_007_personal_details = PersonalDetails(name="",city="",email="")

In [None]:
ask_for = check_what_is_empty(user_007_personal_details)
ask_for

In [None]:
# 定义add_non_empty_details函数负责更新用户的信息
def add_non_empty_details(current_details:PersonalDetails,new_details:PersonalDetails):
    # 这是已经填好的用户信息
    non_empty_details = {k:v for k,v in new_details.dict().items() if v not in [None,"",0]}
    update_details = current_details.copy(update=non_empty_details)
    return update_details

In [None]:
res = chain.run("我的名字007")
user_007_personal_details = add_non_empty_details(user_007_personal_details,res)
user_007_personal_details

In [None]:
ask_for = check_what_is_empty(user_007_personal_details)
ask_for

### 构建BabyAGI

In [1]:
# 导入工具
import os
from collections import deque
from typing import Dict,List,Optional,Any
from langchain import LLMChain,OpenAI,PromptTemplate
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import BaseLLM
from langchain.vectorstores.base import VectorStore
from pydantic import BaseModel,Field
from langchain.chains.base import Chain

In [2]:
# 导入FAISS
from langchain.vectorstores import FAISS
from langchain.docstore import InMemoryDocstore

In [3]:
# 创建嵌入模型
embeddings = OpenAIEmbeddings()

  warn_deprecated(


In [4]:
# 初始化向量数据库
import faiss
embedding_size = 1536
index = faiss.IndexFlatL2(embedding_size)

In [5]:
# 将生成的向量和构建的索引进行存储
vectorstore = FAISS(
    embeddings.embed_query,
    index,
    InMemoryDocstore({}),
    {}
)

`embedding_function` is expected to be an Embeddings object, support for passing in a function will soon be removed.


In [6]:
# 定义创建任务链
class TaskCreationChain(LLMChain):

    @classmethod
    def from_llm(cls,llm:BaseLLM,verbose:bool=True) -> LLMChain:
        # 从解析器获得响应
        task_creation_template = (
            "You are an task creation AI that uses the result of an execution agent"
            " to create new tasks with the following objective:{objective},"
            " The last completed task has the result:{result}."
            " This result was based on this task description:{task_description}."
            " These are incomplete tasks: {incomplete_tasks}."
            " Based on the result,create new tasks to be completed"
            " by the AI system that do not overlap with incomplete tasks."
            " Return the tasks as an array."
        )
        prompt = PromptTemplate(
            template = task_creation_template,
            input_variables = ["result","task_description","incomplete_tasks","objective"],
        )
        return cls(llm=llm,prompt=prompt,verbose=verbose)

In [7]:
# 创建任务优先级判断链
class TaskPrioritizationChain(LLMChain):
    
    @classmethod
    def from_llm(cls,llm:BaseLLM,verbose:bool=True) -> LLMChain:
        # 从解析器获得响应
        task_prioritization_template = (
            "You are an task prioritization AI tasked with cleaning the fromatting of and reprioritizing"
            " the following tasks: {task_names}."
            " Consider the ultimate objective of your team : {objective}."
            " Do not remove any tasks.Return the result as a numbered list,like:"
            " #. First task"
            " #. Second task"
            " Start the task list with number {next_task_id}."
        )
        prompt = PromptTemplate(
            template = task_prioritization_template,
            input_variables = ["task_names","objective","next_task_id"],
        )

        return cls(llm=llm,prompt=prompt,verbose=verbose)

In [8]:
# 执行链
from langchain.agents import ZeroShotAgent,Tool,AgentExecutor
from langchain import OpenAI,SerpAPIWrapper,LLMChain

todo_prompt = PromptTemplate.from_template(
    "You are planner who is an expert at coming up with a todo list for a given objective.Come up with a todo list for this objective: {objective}")

todo_chain = LLMChain(llm=OpenAI(),prompt=todo_prompt)

search = SerpAPIWrapper()

tools = [
    Tool(
        name = "Search",
        func = search.run,
        description = "useful for when you need to answer questions about current events"
    ),
    Tool(
        name = "TODO",
        func = todo_chain.run,
        description = "useful for when you need to come up with todo lists.Input:an objective to create a todo list for.Output:a todo list for that objective.Please be very clear what the objective is!"
    )
]

prefix = """You are an AI who performs one task based on the following 
objective: {objective}.Take into account these previously these previously completed
tasks: {context}.
"""

suffix = """
Question: {task}
{agent_scratchpad}
"""

prompt = ZeroShotAgent.create_prompt(
    tools,
    prefix = prefix,
    suffix = suffix,
    input_variables = ["objective","context","task","agent_scratchpad"],
)

  warn_deprecated(


In [9]:
# 整合所有链
def get_next_task(
        task_createion_chain:LLMChain,
        result:Dict,
        task_description:str,
        task_list:List[str],
        objective:str
    ) -> List[Dict]:
    # 获取下个任务
    incomplete_tasks = ", ".join(task_list)
    response = task_createion_chain.run(
        result = result,
        task_description = task_description,
        incomplete_tasks = incomplete_tasks,
        objective = objective
    )
    new_tasks = response.split("\n")
    return [{"task_name":task_name} for task_name in new_tasks if task_name.strip()]

# 定义任务优先级函数
def prioritize_tasks(task_prioritization_chain:LLMChain,this_task_id:int,task_list:List[str],objective:str) -> List[str]:
    task_names = [t["task_name"] for t in task_list]
    next_task_id = int(this_task_id) + 1
    response = task_prioritization_chain.run(
        task_names = task_names,
        next_task_id = next_task_id,
        objective = objective
    )
    new_tasks = response.split("\n")
    prioritized_tasks_list = []
    for task_string in new_tasks:
        if not task_string.strip():
            continue
        task_parts = task_string.strip().split(".",1)
        if len(task_parts) == 2:
            task_id = task_parts[0].strip()
            task_name = task_parts[1].strip()
            prioritized_tasks_list.append(
                {
                    "task_id":task_id,
                    "task_name":task_name
                }
            )
    return prioritized_tasks_list

# 获取获取最优先的K个任务函数
def _get_top_tasks(vectorstore,query:str,k:int) -> List[str]:
    results = vectorstore.similarity_search_with_score(query,k=k)
    if not results:
        return []
    
    sorted_results,_ = zip(*sorted(results,key=lambda x:x[1],reverse=True))

    return [str(item.metadata['task']) for item in sorted_results]

# 定义执行任务函数
def execute_task(vectorstore,execution_chain:LLMChain,objective:str,task:str,k:int = 5) -> str:
    context = _get_top_tasks(vectorstore,query=objective,k=k)
    return execution_chain.run(
        objective = objective,
        context = context,
        task = task
    )

In [10]:
# 创建BabyAGI类
class BabyAGI(Chain,BaseModel):
    # 用于BabyAGI智能体的控制模型
    
    task_list:deque = Field(default_factory=deque)
    task_creation_chain:TaskCreationChain = Field(...)
    task_prioritization_chain:TaskPrioritizationChain = Field(...)
    execution_chain:AgentExecutor = Field(...)
    task_id_counter:int = Field(1)
    vectorstore:VectorStore = Field(init=False)
    max_iterations: Optional[int] = None

    class config:
        # 用于配置BabyAGI智能体的参数

        aribtray_types_allowed = True

    def add_task(self,task:Dict):
        self.task_list.append(task)

    def print_task_list(self):
        print("\033[95m\033[1m" + "\n*TASK LIST*\n" + "\033[0m\033[0m")
        for task in self.task_list:
            print(str(task["task_id"]) + ": " + task["task_name"])
        
    def print_next_task(self,task:Dict):
        print("\033[92m\033[1m" + "\n*NEXT TASK*\n" + "\033[0m\033[0m")
        print(str(task["task_id"]) + ": " + task["task_name"])

    def print_task_result(self,result:str):
        print("\033[93m\033[1m" + "\n*RESULT*\n" + "\033[0m\033[0m")
        print(result)

    @property
    def input_keys(self) -> List[str]:
        return ["objective"]
    
    @property
    def output_keys(self) -> List[str]:
        return []

    def _call(self,inputs:Dict[str,Any]) -> Dict[str,Any]:
        # 执行BabyAGI智能体
        objective = inputs["objective"]
        first_task = inputs.get("first_task","Make a todo list")
        self.add_task({"task_id":1,"task_name":first_task})
        num_iters = 0
        while True:
            if self.task_list:
                self.print_task_list()
                # 第一步：拉取第一个任务
                task = self.task_list.popleft()
                
                # 第二步：执行任务
                result = execute_task(
                    self.vectorstore,
                    self.execution_chain,
                    objective,
                    task["task_name"]
                )
                this_task_id = int(task["task_id"])
                self.print_task_result(result)

                # 存储结果
                result_id = f"result_{this_task_id}"
                self.vectorstore.add_texts(
                    texts = [result],
                    metadata = [{"task":task["task_name"]}],
                    ids = [result_id]
                )

                # 第四步：执行新任务并重新调整任务列表的优先级
                new_tasks = get_next_task(
                    self.task_creation_chain,
                    result = result,
                    task_description = task["task_name"],
                    task_list = [t["task_name"] for t in self.task_list],
                    objective = objective
                )

                for new_task in new_tasks:
                    self.task_id_counter += 1
                    new_task.update({"task_id":self.task_id_counter})
                    self.add_task(new_task)
                    self.task_list = deque(
                        prioritize_tasks(
                            self.task_prioritization_chain,
                            this_task_id,
                            list(self.task_list),
                            objective
                        )
                    )
                num_iters += 1
                if self.max_iterations is not None and num_iters == self.max_iterations:
                    print(f"\033[91m\033[1m" + "\n*TASK ENDING*\n" + "\033[0m\033[0m")
                    break
        return {}

    @classmethod
    def from_llm(
        cls,
        llm: BaseLLM,
        vectorstore: VectorStore,
        verbose: bool = False,
        **kwargs
    ) -> "BabyAGI":
        # 初始化BabyAGI控制器
        task_creation_chain = TaskCreationChain.from_llm(llm,verbose=verbose)
        task_prioritization_chain = TaskPrioritizationChain.from_llm(llm,verbose=verbose)
        llm_chain = LLMChain(llm=llm,prompt=prompt)
        tool_names = [tool.name for tool in tools]
        agent = ZeroShotAgent(llm_chain=llm_chain,allowed_tools=tool_names)
        agent_executor = AgentExecutor.from_agent_and_tools(
            agent = agent,
            tools = tools,
            verbose = True
        )
        return cls(
            task_creation_chain = task_creation_chain,
            task_prioritization_chain = task_prioritization_chain,
            execution_chain = agent_executor,
            vectorstore = vectorstore,
            **kwargs
        )

In [11]:
# 初始化LLM
llm = OpenAI(temperature=0)

In [12]:
# 初始化OBJECTIVE
OBJECTIVE = "Find the cheapest price and site to buy a Yubikey 5c online and give me the URL"

In [13]:
# 初始化LLMChains
verbose = False
# 如果为空，则将继续执行
max_iterations:Optional[int] = 7
# 实例化BabyAGI
baby_agi = BabyAGI.from_llm(
    llm = llm,
    vectorstore = vectorstore,
    verbose = verbose,
    max_iterations = max_iterations
)

  warn_deprecated(


In [14]:
baby_agi({"objective":OBJECTIVE})

  warn_deprecated(


[95m[1m
*TASK LIST*
[0m[0m
1: Make a todo list


  warn_deprecated(




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I should think about what tasks need to be included in the todo list
Action: TODO
Action Input: Objective to create a todo list[0m
Observation: [33;1m[1;3m

1. Define the purpose and scope of the todo list: Decide what tasks and goals the list should cover and how detailed it needs to be.

2. Brainstorm and list all tasks: Start by brainstorming all the tasks that need to be done in order to create the todo list. This could include research, organizing, and prioritizing.

3. Categorize tasks: Group similar tasks together to make them easier to manage. This could include categories such as research, planning, and implementation.

4. Prioritize tasks: Determine which tasks are the most important and should be done first. This will help to ensure that the most crucial items are completed first.

5. Set deadlines: Assign a deadline to each task to ensure that the todo list is completed in a timely manner. Be realistic

KeyError: 'task'