In [1]:
import numpy as np
import pandas as pd

## 读入数据

In [2]:
dataset = pd.read_csv('titanic.csv')
dataset.head()


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


## 代码解释器工具测试

In [4]:
import pandas as pd
from langchain_core.prompts import ChatPromptTemplate
from langchain_experimental.tools import PythonAstREPLTool

In [5]:
tool = PythonAstREPLTool(locals={"df": dataset})
tool.invoke("df['Age'].mean()")

np.float64(29.69911764705882)

In [6]:
dataset['Age'].mean()

np.float64(29.69911764705882)

## LangChain工作流并绑定内置工具

In [7]:
import os
from dotenv import load_dotenv 
load_dotenv(override=True)

from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chat_models import init_chat_model
from langchain_core.output_parsers import StrOutputParser

In [8]:
model  = init_chat_model(model="deepseek-chat", model_provider="deepseek")

In [9]:
llm_with_tools = model.bind_tools([tool])

In [10]:
response = llm_with_tools.invoke(
    "我有一张表，名为'dataset'，请帮我计算Fare字段的均值。"
)
response

AIMessage(content="我来帮您计算'dataset'表中'Fare'字段的均值。首先让我查看一下数据表的结构和内容。", additional_kwargs={'tool_calls': [{'id': 'call_00_saASh8kWx8ZqaTxOpZjjfKBG', 'function': {'arguments': '{"query": "import pandas as pd\\n\\n# 假设数据表已经加载为名为\'dataset\'的DataFrame\\n# 首先检查数据表是否存在以及Fare字段的情况\\ntry:\\n    # 查看数据表的基本信息\\n    print(\\"数据表形状:\\", dataset.shape)\\n    print(\\"\\\\n数据表列名:\\")\\n    print(dataset.columns.tolist())\\n    print(\\"\\\\nFare字段的前几行:\\")\\n    print(dataset[\'Fare\'].head())\\n    print(\\"\\\\nFare字段的描述性统计:\\")\\n    print(dataset[\'Fare\'].describe())\\nexcept NameError:\\n    print(\\"错误: 未找到名为\'dataset\'的数据表\\")\\nexcept KeyError:\\n    print(\\"错误: 数据表中没有\'Fare\'字段\\")\\nexcept Exception as e:\\n    print(f\\"发生错误: {e}\\")"}', 'name': 'python_repl_ast'}, 'type': 'function', 'index': 0}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 246, 'prompt_tokens': 212, 'total_tokens': 458, 'completion_tokens_details': None, 'prompt_tokens_details': {'audio_toke

In [11]:
from langchain_core.output_parsers.openai_tools import JsonOutputKeyToolsParser

In [12]:
parser = JsonOutputKeyToolsParser(key_name=tool.name, first_tool_only=True)

In [13]:
llm_chain = llm_with_tools | parser

In [14]:
llm_chain.invoke("我有一张表，名为'df'，请帮我计算Age字段的均值。")

{'query': "df['Age'].mean()"}

## 接入tool

In [23]:
system = f"""
你可以访问一个名为 `df` 的 pandas 数据框，请根据用户提出的问题，编写 Python 代码来回答。只返回代码，不返回其他内容。只允许使用 pandas 和内置库。
"""

In [24]:
prompt = ChatPromptTemplate([
    ("system", system),
    ("user", "{question}")
])

In [25]:
chain = prompt | llm_with_tools | parser | tool
chain.invoke({"question": "请计算Age字段的均值。"})

np.float64(29.69911764705882)

In [None]:
chain.invoke({"question": "请计算Age字段和Fare字段的相关系数。"})

np.float64(0.09606669176903912)

In [28]:
chain.invoke({"question": "请按照Pclass 字段计算Age字段的均值。"})

Pclass
1    38.233441
2    29.877630
3    25.140620
Name: Age, dtype: float64

## 打印中间结果

In [30]:
from langchain_core.runnables import RunnableLambda

def code_print(res):
    print("即将运行Python代码:", res['query'])
    return res

print_node = RunnableLambda(code_print)

In [31]:
print_code_chain = prompt | llm_with_tools | parser | print_node | tool
print_code_chain.invoke({"question": "请计算Age字段和Fare字段的相关系数。"})

即将运行Python代码: import pandas as pd

# 计算Age字段和Fare字段的相关系数
correlation = df['Age'].corr(df['Fare'])
print(f"Age和Fare的相关系数为: {correlation}")


'Age和Fare的相关系数为: 0.09606669176903888\n'

# 自定义外部工具

In [1]:
import os
from dotenv import load_dotenv 
load_dotenv(override=True)

OPENWEATHER_API_KEY = os.getenv("OPENWEATHER_API_KEY")

In [2]:
import requests,json

In [3]:
def get_weather(loc):
    """
    查询即时天气函数
    :param loc: 必要参数，字符串类型，用于表示查询天气的具体城市名称，\
    注意，中国的城市需要用对应城市的英文名称代替，例如如果需要查询北京市天气，则loc参数需要输入'Beijing'；
    :return：OpenWeather API查询即时天气的结果，具体URL请求地址为：https://api.openweathermap.org/data/2.5/weather\
    返回结果对象类型为解析之后的JSON格式对象，并用字符串形式进行表示，其中包含了全部重要的天气信息
    """
    # Step 1.构建请求
    url = "https://api.openweathermap.org/data/2.5/weather"

    # Step 2.设置查询参数
    params = {
        "q": loc,               
        "appid": os.getenv("OPENWEATHER_API_KEY"),    # 输入API key
        "units": "metric",            # 使用摄氏度而不是华氏度
        "lang":"zh_cn"                # 输出语言为简体中文
    }

    # Step 3.发送GET请求
    response = requests.get(url, params=params)
    
    # Step 4.解析响应
    data = response.json()
    return json.dumps(data)

In [5]:
get_weather("hangzhou")

'{"coord": {"lon": 120.1614, "lat": 30.2937}, "weather": [{"id": 800, "main": "Clear", "description": "\\u6674", "icon": "01n"}], "base": "stations", "main": {"temp": 17.95, "feels_like": 17.76, "temp_min": 17.95, "temp_max": 17.95, "pressure": 1025, "humidity": 75, "sea_level": 1025, "grnd_level": 1022}, "visibility": 10000, "wind": {"speed": 1.94, "deg": 4, "gust": 4.35}, "clouds": {"all": 2}, "dt": 1761317352, "sys": {"type": 1, "id": 9651, "country": "CN", "sunrise": 1761257236, "sunset": 1761297576}, "timezone": 28800, "id": 1808926, "name": "Hangzhou", "cod": 200}'

## 封装成工具

In [6]:
from langchain_core.tools import tool

In [7]:
@tool
def get_weather(loc):
    """
    查询即时天气函数
    :param loc: 必要参数，字符串类型，用于表示查询天气的具体城市名称，\
    注意，中国的城市需要用对应城市的英文名称代替，例如如果需要查询北京市天气，则loc参数需要输入'Beijing'；
    :return：OpenWeather API查询即时天气的结果，具体URL请求地址为：https://api.openweathermap.org/data/2.5/weather\
    返回结果对象类型为解析之后的JSON格式对象，并用字符串形式进行表示，其中包含了全部重要的天气信息
    """
    # Step 1.构建请求
    url = "https://api.openweathermap.org/data/2.5/weather"

    # Step 2.设置查询参数
    params = {
        "q": loc,               
        "appid": os.getenv("OPENWEATHER_API_KEY"),    # 输入API key
        "units": "metric",            # 使用摄氏度而不是华氏度
        "lang":"zh_cn"                # 输出语言为简体中文
    }

    # Step 3.发送GET请求
    response = requests.get(url, params=params)
    
    # Step 4.解析响应
    data = response.json()
    return json.dumps(data)

In [8]:
print(get_weather.name)
print(get_weather.description)
print(get_weather.args)

get_weather
查询即时天气函数
:param loc: 必要参数，字符串类型，用于表示查询天气的具体城市名称，    注意，中国的城市需要用对应城市的英文名称代替，例如如果需要查询北京市天气，则loc参数需要输入'Beijing'；
:return：OpenWeather API查询即时天气的结果，具体URL请求地址为：https://api.openweathermap.org/data/2.5/weather    返回结果对象类型为解析之后的JSON格式对象，并用字符串形式进行表示，其中包含了全部重要的天气信息
{'loc': {'title': 'Loc'}}


In [9]:
from langchain.chat_models import init_chat_model

# 初始化模型
model = init_chat_model("deepseek-chat", model_provider="deepseek")

In [10]:
# 定义 天气查询 工具函数
tools = [get_weather]

# 将工具绑定到模型
llm_with_tools = model.bind_tools(tools)

In [11]:
response = llm_with_tools.invoke("你好， 请问杭州的天气怎么样？")

print(response)

content='我来帮您查询杭州的天气情况。' additional_kwargs={'tool_calls': [{'id': 'call_00_fAeFff70ejt1xNCAiZvasSaG', 'function': {'arguments': '{"loc": "Hangzhou"}', 'name': 'get_weather'}, 'type': 'function', 'index': 0}], 'refusal': None} response_metadata={'token_usage': {'completion_tokens': 25, 'prompt_tokens': 251, 'total_tokens': 276, 'completion_tokens_details': None, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}, 'prompt_cache_hit_tokens': 0, 'prompt_cache_miss_tokens': 251}, 'model_name': 'deepseek-chat', 'system_fingerprint': 'fp_ffc7281d48_prod0820_fp8_kvcache', 'id': 'a5d535bf-d141-4efa-a277-c26d2c0b5e22', 'service_tier': None, 'finish_reason': 'tool_calls', 'logprobs': None} id='run--317f2959-3cf0-42f0-95d6-2bf050199e27-0' tool_calls=[{'name': 'get_weather', 'args': {'loc': 'Hangzhou'}, 'id': 'call_00_fAeFff70ejt1xNCAiZvasSaG', 'type': 'tool_call'}] usage_metadata={'input_tokens': 251, 'output_tokens': 25, 'total_tokens': 276, 'input_token_details': {'cache_read': 

In [12]:
response.additional_kwargs

{'tool_calls': [{'id': 'call_00_fAeFff70ejt1xNCAiZvasSaG',
   'function': {'arguments': '{"loc": "Hangzhou"}', 'name': 'get_weather'},
   'type': 'function',
   'index': 0}],
 'refusal': None}

In [13]:
from langchain_core.output_parsers.openai_tools import JsonOutputKeyToolsParser

In [14]:
parser = JsonOutputKeyToolsParser(key_name=get_weather.name, first_tool_only=True)

In [15]:
llm_chain = llm_with_tools | parser

In [16]:
llm_chain.invoke("请问杭州今天天气如何？")

{'loc': 'Hangzhou'}

In [18]:
get_weather_chain = llm_with_tools | parser | get_weather

In [19]:
get_weather_chain.invoke("请问杭州今天天气如何？")

'{"coord": {"lon": 120.1614, "lat": 30.2937}, "weather": [{"id": 800, "main": "Clear", "description": "\\u6674", "icon": "01n"}], "base": "stations", "main": {"temp": 17.95, "feels_like": 17.76, "temp_min": 17.95, "temp_max": 17.95, "pressure": 1025, "humidity": 75, "sea_level": 1025, "grnd_level": 1022}, "visibility": 10000, "wind": {"speed": 1.94, "deg": 4, "gust": 4.35}, "clouds": {"all": 2}, "dt": 1761317352, "sys": {"type": 1, "id": 9651, "country": "CN", "sunrise": 1761257236, "sunset": 1761297576}, "timezone": 28800, "id": 1808926, "name": "Hangzhou", "cod": 200}'

In [20]:
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

# Prompt 模板
output_prompt = PromptTemplate.from_template(
    """你将收到一段 JSON 格式的天气数据，请用简洁自然的方式将其转述给用户。
以下是天气 JSON 数据：

```json
{weather_json}
```

请将其转换为中文天气描述，例如：
“北京当前天气晴，气温为 23°C，湿度 58%，风速 2.1 米/秒。”
只返回一句话描述，不要其他说明或解释。"""
)

In [21]:
output_chain = output_prompt | model | StrOutputParser()

In [22]:
weather_json = '{"coord": {"lon": 120.1614, "lat": 30.2937}, "weather": [{"id": 800, "main": "Clear", "description": "\\u6674", "icon": "01n"}], "base": "stations", "main": {"temp": 17.95, "feels_like": 17.76, "temp_min": 17.95, "temp_max": 17.95, "pressure": 1025, "humidity": 75, "sea_level": 1025, "grnd_level": 1022}, "visibility": 10000, "wind": {"speed": 1.94, "deg": 4, "gust": 4.35}, "clouds": {"all": 2}, "dt": 1761317352, "sys": {"type": 1, "id": 9651, "country": "CN", "sunrise": 1761257236, "sunset": 1761297576}, "timezone": 28800, "id": 1808926, "name": "Hangzhou", "cod": 200}'

result = output_chain.invoke({"weather_json": weather_json})
print(result)

杭州当前天气晴，气温为 18°C，湿度 75%，风速 1.94 米/秒。


In [23]:
full_chain = get_weather_chain | output_chain
response = full_chain.invoke("请问杭州今天的天气如何？")
print(response)

杭州当前天气晴，气温为18°C，湿度75%，风速1.94米/秒。
