In [1]:
import os
from typing import Any, Dict, List
import sys

from langchain.agents import load_tools, Tool
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.llms import Tongyi
from langchain.utilities import BingSearchAPIWrapper
from langchain_huggingface.llms import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import pipeline, TextStreamer
from langchain_openai import ChatOpenAI
from langchain_core.outputs import LLMResult
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_core.messages import BaseMessage
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
CHAT_MODEL_PATH = "/mnt/nfs/zsd_server/models/huggingface/llama-3-chinese-8b-instruct-v3/"

In [3]:
os.environ["BING_SUBSCRIPTION_KEY"] = ""
os.environ["BING_SEARCH_URL"] = "https://api.bing.microsoft.com/v7.0/search"

In [4]:
search = BingSearchAPIWrapper()

res = search.run("北京的面积有多大？")
res

'<b>北京</b>市地处华北平原西北隅，地势西北高、东南低。地形<b>的</b>特点是山地<b>多</b>、平原少，北部为中低山区，东南部是缓斜<b>的</b>平原，其中山区<b>面积</b>约占61%，平原<b>面积</b>约占39%。全市平均海拔43.5米。<b>北京</b>市平原<b>的</b>海拔高度在20～60米，山地一般海拔1000～1500米。 北京位于北纬39度56<b>分</b>、东经116度20<b>分</b>，地处华北大平原的北部，东面与天津市毗连，其余均与河北省相邻。 北京，简称“京”，是中华人民共和国的首都，是全国的政治中心、文化中心，是世界著名古都和现代化国际城市。 北京市平原区面积为<b>6200平方千米</b>，<b>约</b>占总面积的38%，山区面积10200平方千米，约占总面积的62%，北京的地形西北高，东南低。 北京市平均海拔43.5米。 北京平原的海拔高度在20～60米，山地一般海拔1000～1500米。 由此可见，重庆<b>的</b>平地<b>面积</b>所占比重较小，而<b>北京</b>、天津、上海<b>的</b>平原<b>面积</b>占总<b>面积的</b>比重分别约是38％、93%、94%，即3200平方千米、11129平方千米、5960.07平方千米。 <b>北京</b>，简称“京”，是中华人民共和国<b>的</b>首都、直辖市、 国家中心城市 、超<b>大</b>城市、国际<b>大</b>都市，全国政治中心、文化中心、国际交往中心、科技创新中心，是 中国共产党中央委员会 、中华人民共和国中央人民政府、 全国人民代表大会 、中国人民政治协商 ... <b>北京</b>（Beijing），简称“京”，古称燕京、北平，是中华人民共和国<b>的</b>首都、直辖市、国家中心城市、超<b>大</b>城市，国务院批复确定<b>的</b>中国政治中心、文化中心、国际交往中心、科技创新中心 [1]，截至2020年，全市下辖16个区，总<b>面积</b>16410.54平方千米。. 抢首赞. 评论 ... 对于“北京有多大？. ”这类问题，当然可以理解为在问“北京的区划范围多大？. ”。. 这很容易查得到：面积<b>1.64万平方公里</b>，常住人口2189万。. 但是，这个问题还有一种更常见并更符合实际的理解：“北京连绵的建

In [5]:
res = search.run("纽约的面积有多大？")
res

'<b>纽约</b>（英语：New York），隶属于美国 <b>纽约</b>州 ，美国第一<b>大</b>城市，位于美国东北部沿海 哈德逊河 口，濒临 大西洋 ，属 温带大陆性湿润气候 。. 总<b>面积</b>1214平方千米，下辖5个区，市政厅驻曼哈顿市政厅公园内。. [22]截至2022年，总人口约834万人。. [43] <b>纽约</b>原为 ... 纽约市整体面积约<b>1214平方公里</b>（<b>468.9平方</b>英里），其中425平方公里（164.1平方英里）为水域，789平方公里（304.8平方英里）为陆地。. 纽约市地处美国东北纽约州东南哈德森河口，大约在华盛顿特区和波士顿中间位置，紧邻的哈德逊河让纽约市享有运航之便，使其 ... <b>阿帕拉契高原区是</b>纽约州面积最大的地理区域，几乎涵盖了该州南方的大部分地区。纽约州最主要的河流有 哈德逊河、莫华克河、圣罗伦斯河、以及尼加拉河。 [1] [4] 作为世界经济和国际金融<b>的</b>神经中枢之一，<b>纽约</b>占据了区域内<b>的</b>核心地位，而位于波士顿郊区<b>的</b>128号公路两侧则聚集了数以千计<b>的</b>研究机构和高科技企业，被称为&quot;美国东海岸<b>的</b>硅谷&quot;。 <b>纽约面积多大纽约</b>是世界特大城市之一，美国最大<b>的</b>金融、商业、贸易和文化中心。 位于美国东北部哈得孙河注入大西洋<b>的</b>河口处。 市区<b>面积</b>945平方公里，其中水面168平方公里。 <b>纽约多大</b>，最长多少？<b>面积</b>多少？<b>纽约</b>（New York）是美国第一<b>大</b>都市和第一<b>大</b>商港，它不仅是美国<b>的</b>金融中心，也是全世界金融中心之一。<b>纽约</b>位于<b>纽约</b>州东南哈得孙河口，濒临大西洋。它由五个区组成：曼哈顿、布鲁克林、 截至2017年，<b>纽约</b>市人口约851万人，居住在789平方公里<b>的</b>土地上。而<b>纽约大</b>都市圈则<b>有</b>2000万人左右，仅次于东京、墨西哥城、孟买，位居世界第四位。2017年，<b>纽约</b>地区生产总值已达到9007亿美元。扩展资料： <b>纽约的的</b>地理环境： 1、地形地貌 根据

In [6]:
# chat_model = AutoModelForCausalLM.from_pretrained(
#     CHAT_MODEL_PATH,
#     # load_in_8bit=True,
#     device_map='auto',
#     torch_dtype=torch.bfloat16,
# )
# tokenizer = AutoTokenizer.from_pretrained(CHAT_MODEL_PATH)
# streamer = TextStreamer(tokenizer)
# pipe = pipeline(
#     "text-generation",
#     model=chat_model,
#     tokenizer=tokenizer,
#     max_length=4096,
#     truncation=True,
#     repetition_penalty=1.2,
#     do_sample=True,
#     temperature=0.6,
#     top_p=0.95,
#     streamer=streamer,
#     return_full_text=False,
# )
# local_llm = HuggingFacePipeline(pipeline=pipe)

In [7]:
class MyStreamingStdOutCallbackHandler(StreamingStdOutCallbackHandler):
    def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
        sys.stdout.write("\n\n")
        sys.stdout.flush()

    # def on_chat_model_start(
    #     self,
    #     serialized: Dict[str, Any],
    #     messages: List[List[BaseMessage]],
    #     **kwargs: Any,
    # ) -> None:
    #     sys.stdout.write(f"{messages}\n\n")
    #     sys.stdout.flush()

callbacks = [MyStreamingStdOutCallbackHandler()]
local_llm = ChatOpenAI(
    model_name="llama-3-chinese-8b-instruct-v3-f16",
    openai_api_key="your-api-key",
    openai_api_base="http://localhost:11434/v1/",
    temperature=0.6,
    streaming=bool(callbacks),
    callbacks=callbacks,
    
)

# langchain自带的bing_search，它的key模型总会输出成`Bing Search`，造成无法匹配
# 而自定义的tool放在load_tools里会报错，官方给的方案竟然是在最后追加上去

In [8]:
# tools = load_tools(["bing-search", "llm-math"], llm=local_llm)
search_tool = Tool(
        name="Search",
        func=search.run,
        description="A wrapper around Search. Useful for when you need to answer questions about current events. Input should be a search query.",
    )
tools = load_tools(["llm-math"], llm=local_llm)
tools = tools + [search_tool]

In [9]:
tools

[Tool(name='Calculator', description='Useful for when you need to answer questions about math.', func=<bound method Chain.run of LLMMathChain(llm_chain=LLMChain(prompt=PromptTemplate(input_variables=['question'], template='Translate a math problem into a expression that can be executed using Python\'s numexpr library. Use the output of running this code to answer the question.\n\nQuestion: ${{Question with math problem.}}\n```text\n${{single line mathematical expression that solves the problem}}\n```\n...numexpr.evaluate(text)...\n```output\n${{Output of running the code}}\n```\nAnswer: ${{Answer}}\n\nBegin.\n\nQuestion: What is 37593 * 67?\n```text\n37593 * 67\n```\n...numexpr.evaluate("37593 * 67")...\n```output\n2518731\n```\nAnswer: 2518731\n\nQuestion: 37593^(1/5)\n```text\n37593**(1/5)\n```\n...numexpr.evaluate("37593**(1/5)")...\n```output\n8.222831614237718\n```\nAnswer: 8.222831614237718\n\nQuestion: {question}\n'), llm=ChatOpenAI(callbacks=[<__main__.MyStreamingStdOutCallback

In [10]:
agent = initialize_agent(
    tools,
    local_llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True)

# agent.run("北京的面积和纽约的面积差是多少？")

  warn_deprecated(


# llama3模型不用自己的模板就无法停止
# 中文问题容易使模型回答时使用中文标点，造成langchain无法匹配`Action: `

In [11]:
agent.agent.llm_chain.prompt.template

'Answer the following questions as best you can. You have access to the following tools:\n\nCalculator(*args: Any, callbacks: Union[List[langchain_core.callbacks.base.BaseCallbackHandler], langchain_core.callbacks.base.BaseCallbackManager, NoneType] = None, tags: Optional[List[str]] = None, metadata: Optional[Dict[str, Any]] = None, **kwargs: Any) -> Any - Useful for when you need to answer questions about math.\nSearch(query: str) -> str - A wrapper around Search. Useful for when you need to answer questions about current events. Input should be a search query.\n\nUse the following format:\n\nQuestion: the input question you must answer\nThought: you should always think about what to do\nAction: the action to take, should be one of [Calculator, Search]\nAction Input: the input to the action\nObservation: the result of the action\n... (this Thought/Action/Action Input/Observation can repeat N times)\nThought: I now know the final answer\nFinal Answer: the final answer to the original i

In [12]:
PREFIX = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>Answer the following questions as best you can. You have access to the following tools:"""
FORMAT_INSTRUCTIONS = """If you can give the final answer:
Final Answer: the final answer to the original input question.
After the final answer, you can <|eot_id|><|eot_id|>

If you can not give the final answer:
Thought: you should always think about what to do
Action: the action to take, should be one of [Calculator, Search]
Action Input: the input to the action.
After Action Input, you can <|eot_id|><|eot_id|>"""
SUFFIX = """<|start_header_id|>user<|end_header_id|>: Begin!

Question: {input}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>:Thought: {agent_scratchpad}"""

agent = initialize_agent(
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    tools=tools,
    llm=local_llm,
    agent_kwargs={
        'prefix':PREFIX,
        'format_instructions':FORMAT_INSTRUCTIONS,
        'suffix':SUFFIX
    },
    verbose=True,
    handle_parsing_errors=True,
    stop=['<|eot_id|>', 'Observation: ']
)

In [13]:
agent.agent.llm_chain.prompt.template

'<|begin_of_text|><|start_header_id|>system<|end_header_id|>Answer the following questions as best you can. You have access to the following tools:\n\nCalculator(*args: Any, callbacks: Union[List[langchain_core.callbacks.base.BaseCallbackHandler], langchain_core.callbacks.base.BaseCallbackManager, NoneType] = None, tags: Optional[List[str]] = None, metadata: Optional[Dict[str, Any]] = None, **kwargs: Any) -> Any - Useful for when you need to answer questions about math.\nSearch(query: str) -> str - A wrapper around Search. Useful for when you need to answer questions about current events. Input should be a search query.\n\nIf you can give the final answer:\nFinal Answer: the final answer to the original input question.\nAfter the final answer, you can <|eot_id|><|eot_id|>\n\nIf you can not give the final answer:\nThought: you should always think about what to do\nAction: the action to take, should be one of [Calculator, Search]\nAction Input: the input to the action.\nAfter Action Inpu

In [14]:
# agent.run("How many people are there in the district where Jinling Middle School is located?")
agent.run("金陵中学所在的区有多少人?")

  warn_deprecated(




[1m> Entering new AgentExecutor chain...[0m
The question is in Chinese. I will try to translate it and determine whether I can give the final answer or need to take an action.

Translation: "How many people are there in the district where Jinling Middle School is located?"

Based on my knowledge, Jinling Middle School is located in Nanjing, Jiangsu Province, China. According to the 2020 census data of Nanjing City, there are over 8 million residents living in the urban area of Nanjing.

However, I cannot give a definitive answer as "区" can have different meanings depending on the context and regional differences. The term could refer to different levels such as neighborhoods (社区区), districts (行政区), or townships/cities (县市区).

Action: Search
Action Input: number of residents in Nanjing city, Jiangsu Province, China

[32;1m[1;3mThe question is in Chinese. I will try to translate it and determine whether I can give the final answer or need to take an action.

Translation: "How many 

'Over 8 million people are permanently living in the urban area of Nanjing, Jiangsu Province, China.'