In [1]:
from langchain_openai import ChatOpenAI

# 支持本地部署的大模型
# llm = ChatOpenAI(base_url="http://127.0.0.1:8000/v1/", model="gpt-3.5-turbo") 
llm_mini = ChatOpenAI(model="gpt-4o-mini")
llm_turbo = ChatOpenAI(model="gpt-3.5-turbo")

* base_url 与 api_key 参数已添加到系统环境变量中，故无需显式传参

In [2]:
from typing import List
from typing import Optional

from pydantic import BaseModel, Field


# Pydantic
class Joke(BaseModel):
    """Joke to tell user."""

    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")
    rating: Optional[int] = Field(
        default=None, description="How funny the joke is, from 1 to 10"
    )

另外一种 Joke 的写法：

In [None]:
from typing import Optional

from typing_extensions import Annotated, TypedDict


# TypedDict
class Joke(TypedDict):
    """Joke to tell user."""

    setup: Annotated[str, ..., "The setup of the joke"]

    # Alternatively, we could have specified setup as:

    # setup: str                    # no default, no description
    # setup: Annotated[str, ...]    # no default, no description
    # setup: Annotated[str, "foo"]  # default, no description

    punchline: Annotated[str, ..., "The punchline of the joke"]
    rating: Annotated[Optional[int], None, "How funny the joke is, from 1 to 10"]

若 llm 是 `gpt-3.5-turbo`，可成功得到输出，若是`gpt-4o-mini` 则会报错。

In [9]:
llm_turbo.with_structured_output(Joke).invoke("Tell me a joke about cats")



Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to keep an eye on the mouse!', rating=7)

In [None]:
## 会报错
# llm_mini.with_structured_output(Joke).invoke("Tell me a joke about cats")

In [13]:
json_schema = {
    "title": "joke",
    "description": "Joke to tell user.",
    "type": "object",
    "properties": {
        "setup": {
            "type": "string",
            "description": "The setup of the joke",
        },
        "punchline": {
            "type": "string",
            "description": "The punchline to the joke",
        },
        "rating": {
            "type": "integer",
            "description": "How funny the joke is, from 1 to 10",
            "default": None,
        },
    },
    "required": ["setup", "punchline"],
}

In [14]:
llm_turbo.with_structured_output(json_schema).invoke("Tell me a joke about cats")



{'setup': 'Why was the cat sitting on the computer?',
 'punchline': 'Because it wanted to keep an eye on the mouse!'}

In [None]:
## 依然报错
# llm_mini.with_structured_output(json_schema).invoke("Tell me a joke about cats")

In [None]:
llm_turbo.with_structured_output(json_schema).invoke("Tell me a joke about cats")

### 绑定多个结构化输出

In [22]:
from typing import Union

class Man(BaseModel):
    """
    男人的信息
    """

    name: str = Field(description="姓名")
    age: str = Field(description="年龄")
    interest: str = Field(description="兴趣爱好")
    colthing: str = Field(description="上身衣服与下身衣服")
    height: str = Field(description="身高")


class Woman(BaseModel):
    """
    女人的信息
    """

    name: str = Field(description="姓名")
    age: str = Field(description="年龄")
    interest: str = Field(description="兴趣爱好")
    colthing: str = Field(description="上身衣服与下身衣服")
    height: str = Field(description="身高")



class Person(BaseModel):
    final_output: Union[Man, Woman]

In [23]:
llm_turbo.with_structured_output(Person).invoke("帮我生成一个男人的信息")



Person(final_output=Man(name='张伟', age='30', interest='运动，旅行，读书', colthing='白色衬衫，深色牛仔裤', height='175cm'))

In [24]:
llm_turbo.with_structured_output(Person).invoke("帮我生成一个女人的信息")



Person(final_output=Man(name='李华', age='28', interest='阅读，旅行，烹饪', colthing='白色衬衫和黑色裙子', height='165cm'))

## parse

为了解决一些模型不支持结构化输出

In [None]:
class Man(BaseModel):
    """
    男人的信息
    """
    name: str = Field(description="姓名")
    age: str = Field(description="年龄")
    interest: str = Field(description="兴趣爱好")
    colthing: str = Field(description="上身衣服与下身衣服")
    height: str = Field(description="身高")

In [25]:
from langchain_core.prompts import PromptTemplate

prompt_template = PromptTemplate.from_template(
    "hello {name}"
)

In [27]:
prompt_template.invoke({"name": "world"})

StringPromptValue(text='hello world')

In [29]:
Man

__main__.Man

In [32]:
from langchain.output_parsers import PydanticOutputParser

from langchain_core.prompts import PromptTemplate

prompt_template = PromptTemplate.from_template("""
    生成一个男人的信息。以json格式返回，包含姓名、年龄、兴趣爱好、上身衣服与下身衣服、身高。格式如下:
    ```json
    {{
        "name": "张三",
        "age": "20",
        "interest": "打篮球",
        "colthing": "白色T恤与黑色裤子",
        "height": "180cm"
    }}
    ```
    """.strip()
)
parser = PydanticOutputParser(pydantic_object=Man)
chain = prompt_template | llm_mini | parser
man_info = chain.invoke({})
if man_info:
    print(man_info)
else:
    print("没有返回结果")

name='李四' age='28' interest='旅游与摄影' colthing='蓝色衬衫与卡其色长裤' height='175cm'


In [33]:
man_info

Man(name='李四', age='28', interest='旅游与摄影', colthing='蓝色衬衫与卡其色长裤', height='175cm')

## Few-shot prompting

**写法一**，直接写入到提示词中：

In [42]:
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate

system_msg = """You are a hilarious comedian. Your specialty is knock-knock jokes. 
Return a joke which has the setup (the response to "Who's there?") and the final punchline (the response to "<setup> who?")."""

examples = """
example_user: Tell me a joke about planes
example_assistant: {{"setup": "Why don't planes ever get tired?", "punchline": "Because they have rest wings!", "rating": 2}}

example_user: Tell me another joke about planes
example_assistant: {{"setup": "Cargo", "punchline": "Cargo 'vroom vroom', but planes go 'zoom zoom'!", "rating": 10}}

example_user: Now about caterpillars
example_assistant: {{"setup": "Caterpillar", "punchline": "Caterpillar really slow, but watch me turn into a butterfly and steal the show!", "rating": 5}}
""".strip()

prompt = PromptTemplate.from_template(
"""
{system_msg}

Here are some examples of jokes:
{examples}

example_user: {input}
""".strip()
)

# prompt = ChatPromptTemplate.from_messages([("system", system), ("human", "{input}")])


langchain的提示词模板可以使用 `invoke`和`format` 进行提示词填充

查看填充完成后的提示词：

In [37]:
print(prompt.invoke({
    "system_msg": system_msg,
    "examples": examples,
    "input": "what's something funny about woodpeckers",
}).text)

You are a hilarious comedian. Your specialty is knock-knock jokes. 
Return a joke which has the setup (the response to "Who's there?") and the final punchline (the response to "<setup> who?").

Here are some examples of jokes:
example_user: Tell me a joke about planes
example_assistant: {{"setup": "Why don't planes ever get tired?", "punchline": "Because they have rest wings!", "rating": 2}}

example_user: Tell me another joke about planes
example_assistant: {{"setup": "Cargo", "punchline": "Cargo 'vroom vroom', but planes go 'zoom zoom'!", "rating": 10}}

example_user: Now about caterpillars
example_assistant: {{"setup": "Caterpillar", "punchline": "Caterpillar really slow, but watch me turn into a butterfly and steal the show!", "rating": 5}}
example_user: what's something funny about woodpeckers


In [38]:
print(
    prompt.format(
        system_msg=system_msg,
        examples=examples,
        input="what's something funny about woodpeckers",
    )
)

You are a hilarious comedian. Your specialty is knock-knock jokes. 
Return a joke which has the setup (the response to "Who's there?") and the final punchline (the response to "<setup> who?").

Here are some examples of jokes:
example_user: Tell me a joke about planes
example_assistant: {{"setup": "Why don't planes ever get tired?", "punchline": "Because they have rest wings!", "rating": 2}}

example_user: Tell me another joke about planes
example_assistant: {{"setup": "Cargo", "punchline": "Cargo 'vroom vroom', but planes go 'zoom zoom'!", "rating": 10}}

example_user: Now about caterpillars
example_assistant: {{"setup": "Caterpillar", "punchline": "Caterpillar really slow, but watch me turn into a butterfly and steal the show!", "rating": 5}}
example_user: what's something funny about woodpeckers


In [None]:
few_shot_chain1 = prompt | llm_mini.with_structured_output(Joke)
few_shot_chain1.invoke(
    {
        "system_msg": system_msg,
        "examples": examples,
        "input": "what's something funny about woodpeckers",
    }
)

Joke(setup='Woodpecker', punchline="Woodpecker knocking at your door? It's just trying to show you its new peck-formance!", rating=7)

没有 few_shot, gpt-4o-mini 模型会报错，加上 few-shot gpt-4o-mini 模型可以正常得到结果

**写法二**，`FewShotPromptTemplate`：

In [None]:
import json
from langchain_core.prompts import PromptTemplate, FewShotPromptTemplate

system_msg = """You are a hilarious comedian. Your specialty is knock-knock jokes. \
Return a joke which has the setup (the response to "Who's there?") and the final punchline (the response to "<setup> who?").

Here are some examples of jokes:""".strip()

# 定义格式化单个示例的 PromptTemplate
example_prompt = PromptTemplate(
    template="Q: {query}\nA: {{{answer}}}",
)

# 示例数据
examples = [
    {
        "query": "Tell me a joke about planes",
        "answer": {"setup": "Why don\'t planes ever get tired?", "punchline": "Because they have rest wings!", "rating": 2},
    },
    {
        "query": "Tell me another joke about planes",
        "answer": {"setup": "Cargo", "punchline": "Cargo 'vroom vroom', but planes go 'zoom zoom'!", "rating": 10},
    }
]

for example in examples:
    example["answer"] = json.dumps(example["answer"])

# 构建 FewShotPromptTemplate
few_shot_prompt2 = FewShotPromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
    prefix=system_msg,
    suffix="Q: {input}\nA:",
)

In [72]:
print(few_shot_prompt2.invoke({"input": "Now about caterpillars"}).text)

You are a hilarious comedian. Your specialty is knock-knock jokes. Return a joke which has the setup (the response to "Who's there?") and the final punchline (the response to "<setup> who?").

Here are some examples of jokes:

Q: Tell me a joke about planes
A: {"setup": "Why don't planes ever get tired?", "punchline": "Because they have rest wings!", "rating": 2}

Q: Tell me another joke about planes
A: {"setup": "Cargo", "punchline": "Cargo 'vroom vroom', but planes go 'zoom zoom'!", "rating": 10}

Q: Now about caterpillars
A:


值得注意的是 examples的answer是一个字典。为了不让langchain报错，针对 PromptTemplate 我是这些写的：

`"Q: {query}\nA: {{{answer}}}"`

使用了三个括号，把answer包住。大家记住`{{`是`{`的转义，然后再去理解三个括号就行了。

In [71]:
few_shot_chain2 = few_shot_prompt2 | llm_mini.with_structured_output(Joke)
few_shot_chain2.invoke(
    {
        "input": "what's something funny about woodpeckers",
    }
)

Joke(setup='Woodpecker', punchline="Woodpecker who's always knocking on wood for good luck!", rating=8)

## 文本分类的结构化输出

In [None]:
# from langchain_core.messages import AIMessage, HumanMessage, SystemMessage

In [74]:
# diy 实现一个文本分类，经济、工业、产业等
from langchain_core.prompts import PromptTemplate, FewShotPromptTemplate

class TextCLS(BaseModel):
    """
    文本分类的结构化输出
    """

    keyword: List[str] = Field(description="问题中出现的与分类相关的关键词")
    reason: str = Field(description="分类的原因")
    label: str = Field(description="文本分类label")
    
schema = ["经济", "民生", "产业", "绿色发展", "军事", "其他"]

system_msg = "请你完成文本分类任务，按照要求完成关键词提取，输出分类原因与最终的类别。文本的类别是：{schema}"

# 定义格式化单个示例的 PromptTemplate
example_prompt = PromptTemplate(
    template="Q: {query}\nA: {answer}",
)

# 示例数据
examples = [
    {
        "query": "武汉市今年GDP上涨2%",
        "answer": '{{"keyword": ["GDP"], "reason": "GDP与经济相关", "label": "经济"}}',
    },
    {
        "query": "氢能产业园区的相关配套措施完善，园区内有很多氢能领域龙头企业",
        "answer": """{{
                "keyword": ["氢能产业园区", "氢能领域龙头企业"],
                "reason": "问题中的氢能产业园区与氢能领域龙头企业都与产业相关",
                "label": "产业",
            }}""".strip(),
    },
]

# 构建 FewShotPromptTemplate
few_shot_prompt = FewShotPromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
    prefix=system_msg,
    suffix="Q: {input}\nA:",
)

In [None]:
# 一次性传递全部参数
prompt = few_shot_prompt.invoke(
    {
        "input": "武汉市今年GDP上涨2%",
        "schema": schema,
    }
)
print(prompt.text)

请你完成文本分类任务，按照要求完成关键词提取，输出分类原因与最终的类别。文本的类别是：['经济', '民生', '产业', '绿色发展', '军事', '其他']

Q: 武汉市今年GDP上涨2%
A: {"keyword": ["GDP"], "reason": "GDP与经济相关", "label": "经济"}

Q: 氢能产业园区的相关配套措施完善，园区内有很多氢能领域龙头企业
A: {
                "keyword": ["氢能产业园区", "氢能领域龙头企业"],
                "reason": "问题中的氢能产业园区与氢能领域龙头企业都与产业相关",
                "label": "产业",
            }

Q: 武汉市今年GDP上涨2%
A:


提示词模板使用format与invoke方法来格式化字符串:

```python
prompt = PromptTemplate.from_template("{foo}{bar}")
prompt.invoke({"foo": "hello", "bar": "world"})
prompt.format(foo="hello",bar="world")
```

In [75]:
# 部分提示词: 设置文本分类的label，不用每一次都传递schema进去
partial_prompt = few_shot_prompt.partial(schema=schema)
partial_prompt.invoke({"input":"武汉市今年GDP上涨2%"})

StringPromptValue(text='请你完成文本分类任务，按照要求完成关键词提取，输出分类原因与最终的类别。文本的类别是：[\'经济\', \'民生\', \'产业\', \'绿色发展\', \'军事\', \'其他\']\n\nQ: 武汉市今年GDP上涨2%\nA: {"keyword": ["GDP"], "reason": "GDP与经济相关", "label": "经济"}\n\nQ: 氢能产业园区的相关配套措施完善，园区内有很多氢能领域龙头企业\nA: {\n                "keyword": ["氢能产业园区", "氢能领域龙头企业"],\n                "reason": "问题中的氢能产业园区与氢能领域龙头企业都与产业相关",\n                "label": "产业",\n            }\n\nQ: 武汉市今年GDP上涨2%\nA:')

In [62]:
chanin = partial_prompt | llm_mini.with_structured_output(TextCLS)

In [63]:
chanin.invoke("北京市今年的生产总值提高了5个百分点")

TextCLS(keyword=['生产总值'], reason='生产总值与经济相关，反映经济增长情况', label='经济')

In [64]:
chanin.invoke("今年的氢能产业园区发展迅速，很多企业都在投资氢能产业")

TextCLS(keyword=['氢能产业园区', '投资氢能产业'], reason='氢能产业园区的发展和企业投资都与产业相关', label='产业')

In [65]:
chanin.invoke("你爱我吗？")

TextCLS(keyword=['爱'], reason='文本为情感表达，与具体类别无关', label='其他')