In [1]:
from langchain_community.chat_models import ChatTongyi

chat = ChatTongyi()

# PydanticOutputParser

In [6]:
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field, validator


# Define your desired data structure.
class Joke(BaseModel):
    setup: str = Field(description="开启一个笑话的问题")
    punchline: str = Field(description="解答笑话的答案")

    # You can add custom validation logic easily with Pydantic.
    @validator("setup")
    def question_ends_with_question_mark(cls, field):
        if field[-1] != "？":
            raise ValueError("Badly formed question!")
        return field


# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=Joke)

prompt = PromptTemplate(
    template="根据用户的输入进行解答.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

# And a query intended to prompt a language model to populate the data structure.
chain = prompt | chat | parser
chain.invoke({"query": "讲一个笑话"})

Joke(setup='为什么电脑永远不会感冒？', punchline='因为它有Windows（Windows，意为窗户，这里指电脑不会打开，所以不会受冷）')

In [2]:
parser.get_format_instructions()

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"setup": {"title": "Setup", "description": "\\u5f00\\u542f\\u4e00\\u4e2a\\u7b11\\u8bdd\\u7684\\u95ee\\u9898", "type": "string"}, "punchline": {"title": "Punchline", "description": "\\u89e3\\u7b54\\u7b11\\u8bdd\\u7684\\u7b54\\u6848", "type": "string"}}, "required": ["setup", "punchline"]}\n```'

# Json解析器

In [7]:
from langchain_core.output_parsers import JsonOutputParser

# Set up a parser + inject instructions into the prompt template.
parser = JsonOutputParser(pydantic_object=Joke)

chain = prompt | chat | parser

chain.invoke({"query": "讲一个笑话"})

{'setup': '为什么电脑永远不会感冒？', 'punchline': '因为它有Windows（窗户）但是不开！'}

In [8]:
parser.get_format_instructions()

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"setup": {"title": "Setup", "description": "\\u5f00\\u542f\\u4e00\\u4e2a\\u7b11\\u8bdd\\u7684\\u95ee\\u9898", "type": "string"}, "punchline": {"title": "Punchline", "description": "\\u89e3\\u7b54\\u7b11\\u8bdd\\u7684\\u7b54\\u6848", "type": "string"}}, "required": ["setup", "punchline"]}\n```'

# 自定义解析器
## Runnable Lambdas and Generators

In [11]:
from typing import Iterable

from langchain_core.messages import AIMessage, AIMessageChunk

def parse(ai_message: AIMessage) -> str:
    """Parse the AI message."""
    return ai_message.content.swapcase()


chain = chat | parse
chain.invoke("hello")

'hELLO! hOW CAN i ASSIST YOU TODAY?'

## Runnable Generators

In [3]:
from langchain_core.runnables import RunnableGenerator


def streaming_parse(chunks: Iterable[AIMessageChunk]) -> Iterable[str]:
    for chunk in chunks:
        yield chunk.content.swapcase()


streaming_parse = RunnableGenerator(streaming_parse)

chain = chat | streaming_parse

for chunk in chain.stream("tell me about yourself in one sentence"):
    print(chunk, end="|", flush=True)

i| AM| A| LARGE LANGUAGE MODEL CREATED BY| aLIBABA cLOUD, DESIGNED TO ANSWER QUESTIONS AND| PROVIDE INFORMATION ON VARIOUS TOPICS.|

## 继承解析器基类

In [2]:
from langchain_core.exceptions import OutputParserException
from langchain_core.output_parsers import BaseOutputParser


# The [bool] desribes a parameterization of a generic.
# It's basically indicating what the return type of parse is
# in this case the return type is either True or False
class BooleanOutputParser(BaseOutputParser[bool]):
    """Custom boolean parser."""

    true_val: str = "YES"
    false_val: str = "NO"

    def parse(self, text: str) -> bool:
        cleaned_text = text.strip().upper()
        if cleaned_text not in (self.true_val.upper(), self.false_val.upper()):
            raise OutputParserException(
                f"BooleanOutputParser expected output value to either be "
                f"{self.true_val} or {self.false_val} (case-insensitive). "
                f"Received {cleaned_text}."
            )
        return cleaned_text == self.true_val.upper()

    @property
    def _type(self) -> str:
        return "boolean_output_parser"

In [6]:
parser = BooleanOutputParser()
chain = chat | parser
chain.invoke('回答我的问题，如果答案是"是"，直接回答"Yes"，如果答案是否，直接回答"No"。我的问题是："一周有7天吗"')

True

## 解析原始的LLM输出
模型的输出其实经常包含一些额外信息`metadata`的，因此如果解析器需要这部分信息的话，可以使用下面的方法

In [None]:
from typing import List

from langchain_core.exceptions import OutputParserException
from langchain_core.messages import AIMessage
from langchain_core.output_parsers import BaseGenerationOutputParser
from langchain_core.outputs import ChatGeneration, Generation


class StrInvertCase(BaseGenerationOutputParser[str]):
    """An example parser that inverts the case of the characters in the message.

    This is an example parse shown just for demonstration purposes and to keep
    the example as simple as possible.
    """

    def parse_result(self, result: List[Generation], *, partial: bool = False) -> str:
        """Parse a list of model Generations into a specific format.

        Args:
            result: A list of Generations to be parsed. The Generations are assumed
                to be different candidate outputs for a single model input.
                Many parsers assume that only a single generation is passed it in.
                We will assert for that
            partial: Whether to allow partial results. This is used for parsers
                     that support streaming
        """
        if len(result) != 1:
            raise NotImplementedError(
                "This output parser can only be used with a single generation."
            )
        generation = result[0]
        if not isinstance(generation, ChatGeneration):
            # Say that this one only works with chat generations
            raise OutputParserException(
                "This output parser can only be used with a chat generation."
            )
        return generation.message.content.swapcase()

In [None]:
chain = chat | StrInvertCase()
chain.invoke('回答我的问题，如果答案是"是"，直接回答"Yes"，如果答案是否，直接回答"No"。我的问题是："一周有7天吗"')

# 类命名实体识别（NER）技术

In [5]:
# Define your desired data structure.
class Joke(BaseModel):
    setup: str = Field(description="开启一个笑话的问题")
    punchline: str = Field(description="解答笑话的答案")

    # You can add custom validation logic easily with Pydantic.
    @validator("setup")
    # def question_ends_with_question_mark(cls, field):
    def test(cls, field):
        if field[-1] != "？":
            raise ValueError("Badly formed question!")
        return field


# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=Joke)

prompt = PromptTemplate(
    template="根据用户的输入进行解答.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

# And a query intended to prompt a language model to populate the data structure.
prompt_and_model = prompt | chat
output = prompt_and_model.invoke({"query": "讲一个笑话"})
parser.invoke(output)

Joke(setup='为什么电脑永远不会感冒？', punchline='因为它有Windows（Windows，意为窗户，这里指电脑不会打开通风口）！')

In [None]:
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

output_parser = CommaSeparatedListOutputParser()

format_instructions = output_parser.get_format_instructions()