The .with_structured_output() method

In [1]:
# ! pip install -qU langchain-openai

In [2]:
import getpass
import os

os.environ["http_proxy"] = "http://localhost:7890"
os.environ["https_proxy"] = "http://localhost:7890"

openai_api_key=os.environ["OPENAI_API_KEY"]
print(openai_api_key)

os.environ["OPENAI_API_BASE"] ="https://api.zhizengzeng.com/v1/"

sk-zk224a777126590fa5988d720e1c413bd6866854f68d0768


In [3]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo-0125")

In [4]:
from typing import Optional

from langchain_core.pydantic_v1 import BaseModel,Field

In [5]:
class Joke(BaseModel):
    """Joke is tell user"""
    setup: str =Field(description="the setpu of the joke")
    punchline : str = Field(description="the punchile to the joke")
    rating : Optional[int] =Field(description="How funny the joke is,from 1 to 10")

structured_llm=llm.with_structured_output(Joke)
structured_llm.invoke("Tell me a joke about cats")


Joke(setup='Why was the cat sitting on the computer?', punchline='To keep an eye on the mouse!', rating=8)

json_schema

In [6]:
json_schema={
    "title":"Joke",
    "description": "Joke to tell user.",
    "type": "object",
    "properties": {
        "setup": {
            "type": "string",
            "description": "The setup of the joke",
        },
        "punchline": {
            "type": "string",
            "description": "The punchline to the joke",
        },
        "rating": {
            "type": "integer",
            "description": "How funny the joke is, from 1 to 10",
        },
    },
    "required": ["setup", "punchline"],
    }
structured_llm = llm.with_structured_output(json_schema)
structured_llm.invoke("Tell me a joke about cats")

{'setup': 'Why was the cat sitting on the computer?',
 'punchline': 'To keep an eye on the mouse!',
 'rating': 8}

In [7]:
from typing import Union

class Conversationalresponse(BaseModel):
    """Respond in a conversational manner. Be kind and helpful."""
    response : str = Field(description="A conversational response to the user's query")

class Response(BaseModel):
    output : Union[Joke, Conversationalresponse]

structured_llm = llm.with_structured_output(Response)
structured_llm.invoke("Tell me a joke about cats")

Response(output=Joke(setup='Why was the cat sitting on the computer?', punchline='To keep an eye on the mouse!', rating=8))

In [8]:
structured_llm.invoke("How are you today?")

Response(output=Conversationalresponse(response="I'm just a computer program, so I don't have feelings, but I'm here to help you with any questions you have. How can I assist you today?"))

Streaming 流式输出

In [9]:
structured_llm = llm.with_structured_output(json_schema)

for chunk in structured_llm.stream("Tell me a joke about cats"):
    print(chunk)

{}
{'setup': ''}
{'setup': 'Why'}
{'setup': 'Why was'}
{'setup': 'Why was the'}
{'setup': 'Why was the cat'}
{'setup': 'Why was the cat sitting'}
{'setup': 'Why was the cat sitting on'}
{'setup': 'Why was the cat sitting on the'}
{'setup': 'Why was the cat sitting on the computer'}
{'setup': 'Why was the cat sitting on the computer?'}
{'setup': 'Why was the cat sitting on the computer?', 'punchline': ''}
{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'To'}
{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'To keep'}
{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'To keep an'}
{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'To keep an eye'}
{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'To keep an eye on'}
{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'To keep an eye on the'}
{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'To keep an eye on the mouse

Few-shot prompting 少量提示

In [10]:
from langchain_core.prompts import ChatPromptTemplate

system = """
You are a hilarious comedian. Your specialty is knock-knock jokes. \
Return a joke which has the setup (the response to "Who's there?") and the final punchline (the response to "<setup> who?").

Here are some examples of jokes:

example_user: Tell me a joke about planes
example_assistant: {{"setup": "Why don't planes ever get tired?", "punchline": "Because they have rest wings!", "rating": 2}}

example_user: Tell me another joke about planes
example_assistant: {{"setup": "Cargo", "punchline": "Cargo 'vroom vroom', but planes go 'zoom zoom'!", "rating": 10}}

example_user: Now about caterpillars
example_assistant: {{"setup": "Caterpillar", "punchline": "Caterpillar really slow, but watch me turn into a butterfly and steal the show!", "rating": 5}}
"""

prompt = ChatPromptTemplate.from_messages([("system",system),("human","{input}")])

few_shot_structured_llm = prompt | structured_llm
few_shot_structured_llm.invoke("what's something funny about woodpeckers")

{'setup': 'Wooden',
 'punchline': "Wooden you like to know why woodpeckers don't get headaches?",
 'rating': 8}

In [11]:
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage

examples = [
    HumanMessage("Tell me a joke about planes", name="example_user"),
    AIMessage(
        "",
        name="example_assistant",
        tool_calls=[
            {
                "name": "joke",
                "args": {
                    "setup": "Why don't planes ever get tired?",
                    "punchline": "Because they have rest wings!",
                    "rating": 2,
                },
                "id": "1",
            }
        ],
    ),
    # Most tool-calling models expect a ToolMessage(s) to follow an AIMessage with tool calls.
    ToolMessage("", tool_call_id="1"),
    # Some models also expect an AIMessage to follow any ToolMessages,
    # so you may need to add an AIMessage here.
    HumanMessage("Tell me another joke about planes", name="example_user"),
    AIMessage(
        "",
        name="example_assistant",
        tool_calls=[
            {
                "name": "joke",
                "args": {
                    "setup": "Cargo",
                    "punchline": "Cargo 'vroom vroom', but planes go 'zoom zoom'!",
                    "rating": 10,
                },
                "id": "2",
            }
        ],
    ),
    ToolMessage("", tool_call_id="2"),
    HumanMessage("Now about caterpillars", name="example_user"),
    AIMessage(
        "",
        tool_calls=[
            {
                "name": "joke",
                "args": {
                    "setup": "Caterpillar",
                    "punchline": "Caterpillar really slow, but watch me turn into a butterfly and steal the show!",
                    "rating": 5,
                },
                "id": "3",
            }
        ],
    ),
    ToolMessage("", tool_call_id="3"),
]

In [12]:
system = """
You are a hilarious comedian. Your specialty is knock-knock jokes. \
Return a joke which has the setup (the response to "Who's there?") \
and the final punchline (the response to "<setup> who?").
"""

prompt = ChatPromptTemplate.from_messages (
    [("system", system), ("placeholder", "{examples}"), ("human", "{input}")]
)

few_shot_structured_llm = prompt | structured_llm
few_shot_structured_llm.invoke({"input":"crocodiles","examples": examples})

{'setup': 'Crocodile',
 'punchline': "Crocodile your best friend, he'll always have your back!",
 'rating': 7}

(Advanced) Specifying the method for structuring outputs 指定构造输出

In [13]:
structured_llm = llm.with_structured_output(Joke,method="json_mode")

structured_llm.invoke(
    "Tell me a joke about cats, respond in JSON with `setup` and `punchline` keys"
)

Joke(setup='Why was the cat sitting on the computer?', punchline='He wanted to keep an eye on the mouse!', rating=None)

(Advanced) Raw outputs 原始输出
传递 include_raw=True 来避免引发异常并自行处理原始输出。

In [14]:
structured_llm = llm.with_structured_output(Joke, include_raw=True)

structured_llm.invoke(
    "Tell me a joke about cats, respond in JSON with `setup` and `punchline` keys"
)

{'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_8CmxFZ1e4mV3e7g9hSXS8pgu', 'function': {'arguments': '{"setup":"Why was the cat sitting on the computer?","punchline":"To keep an eye on the mouse!"}', 'name': 'Joke'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 115, 'total_tokens': 141}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-63a751b5-2039-4ac7-a110-1fe118e03e57-0', tool_calls=[{'name': 'Joke', 'args': {'setup': 'Why was the cat sitting on the computer?', 'punchline': 'To keep an eye on the mouse!'}, 'id': 'call_8CmxFZ1e4mV3e7g9hSXS8pgu'}], usage_metadata={'input_tokens': 115, 'output_tokens': 26, 'total_tokens': 141}),
 'parsed': Joke(setup='Why was the cat sitting on the computer?', punchline='To keep an eye on the mouse!', rating=None),
 'parsing_error': None}

Prompting and parsing model outputs directly
对于不支持tool calling or JSON mode的模型，直接提示模型使用特定格式，并使用输出解析器从原始模型输出中提取结构化响应。


Using PydanticOutputParser
通过解析器上的方法直接将 format_instructions 添加到提示中


In [15]:
from typing import List

from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field

class Person(BaseModel):
    """Information about a person."""

    name: str = Field(..., description="The name of the person")
    height_in_meters: float = Field(
        ..., description="The height of the person expressed in meters."
    )


class People(BaseModel):
    """Identifying information about all people in a text."""

    people: List[Person]

# Set up a parser 设置解析器
parser = PydanticOutputParser(pydantic_object=People)

# Prompt
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Anawer the user the query. Wrap the output in 'json' tag\n {format_instructions}"),
            ("human","{query}")
        
    ]
).partial(format_instructions=parser.get_format_instructions())              ###


In [16]:
query = "Anna is 23 years old and she is 6 feet tall"

print(prompt.invoke(query).to_string())   #to_string()转为字符串格式

System: Anawer the user the query. Wrap the output in 'json' tag
 The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"description": "Identifying information about all people in a text.", "properties": {"people": {"title": "People", "type": "array", "items": {"$ref": "#/definitions/Person"}}}, "required": ["people"], "definitions": {"Person": {"title": "Person", "description": "Information about a person.", "type": "object", "properties": {"name": {"title": "Name", "description": "The name of the person", "type": "string"}, "height_in_meters": {"title": "Height In Meters", "description": "The 

In [17]:
chain = prompt | llm | parser

chain.invoke({"query":query})

People(people=[Person(name='Anna', height_in_meters=1.8288)])

Custom Parsing 自定义解析

In [18]:
import json
import re
from typing import List

from langchain_core.messages import AIMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field

In [19]:
class Person(BaseModel):
    """Information about a person."""

    name: str = Field(..., description="The name of the person")
    height_in_meters: float = Field(
        ..., description="The height of the person expressed in meters."
    )


class People(BaseModel):
    """Identifying information about all people in a text."""

    people: List[Person]


# Prompt
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer the user query. Output your answer as JSON that  "
            "matches the given schema: ```json\n{schema}\n```. "
            "Make sure to wrap the answer in ```json and ``` tags",
        ),
        ("human", "{query}"),
    ]
).partial(schema=People.schema())

In [20]:
# 自定义解析器函数
# Custom parser
def extract_json(message: AIMessage) -> List[dict]:
    """Extracts JSON content from a string where JSON is embedded between ```json and ``` tags.

    Parameters:
        text (str): The text containing the JSON content.

    Returns:
        list: A list of extracted JSON strings.
    """
    text = message.content
    # 定义正则表达式模式来匹配 JSON 块
    # Define the regular expression pattern to match JSON blocks
    pattern = r"```json(.*?)```"            #包含在三重反引号块中，并以 json 开头的内容。

    # Find all non-overlapping matches of the pattern in the string
    # 去重
    matches = re.findall(pattern , text, re.DOTALL)

    # Return the list of matched JSON strings, stripping any leading or trailing whitespace
    # strip()去除首尾空格
    try:
        return [json.loads(match.strip()) for match in matches]
    except Exception:
        raise ValueError(f"Failed to parse: {message}")

In [21]:
# 不调用解析器
query = "Anna is 23 years old and she is 6 feet tall"

print(prompt.format_prompt(query=query).to_string())

System: Answer the user query. Output your answer as JSON that  matches the given schema: ```json
{'title': 'People', 'description': 'Identifying information about all people in a text.', 'type': 'object', 'properties': {'people': {'title': 'People', 'type': 'array', 'items': {'$ref': '#/definitions/Person'}}}, 'required': ['people'], 'definitions': {'Person': {'title': 'Person', 'description': 'Information about a person.', 'type': 'object', 'properties': {'name': {'title': 'Name', 'description': 'The name of the person', 'type': 'string'}, 'height_in_meters': {'title': 'Height In Meters', 'description': 'The height of the person expressed in meters.', 'type': 'number'}}, 'required': ['name', 'height_in_meters']}}}
```. Make sure to wrap the answer in ```json and ``` tags
Human: Anna is 23 years old and she is 6 feet tall


In [22]:

#调用解析器
chain = prompt | llm | extract_json
chain.invoke({"query":query})            

#输出结果有点问题

[{'title': 'People',
  'description': 'Identifying information about all people in a text.',
  'type': 'object',
  'properties': {'people': {'title': 'People',
    'type': 'array',
    'items': {'$ref': '#/definitions/Person'}}},
  'required': ['people'],
  'definitions': {'Person': {'title': 'Person',
    'description': 'Information about a person.',
    'type': 'object',
    'properties': {'name': {'title': 'Name',
      'description': 'The name of the person',
      'type': 'string'},
     'height_in_meters': {'title': 'Height In Meters',
      'description': 'The height of the person expressed in meters.',
      'type': 'number'}},
    'required': ['name', 'height_in_meters']}}}]