In [1]:
from langchain_community.chat_models import ChatTongyi

chat = ChatTongyi()

# 阻塞模式

In [2]:
from langchain_core.messages import HumanMessage, SystemMessage

messages = [
    SystemMessage(content="你是一个数学专家"),
    HumanMessage(content="什么是勾股定理"),
]

chat.invoke(messages)

AIMessage(content='勾股定理是古希腊数学家毕达哥拉斯发现的一个几何学基本定理，也被称为毕达哥拉斯定理。它描述了直角三角形中各边之间的关系：在一个直角三角形中，直角两边的平方和等于斜边的平方。用数学公式表示就是：\n\n如果直角三角形的两条直角边分别是a和b，斜边是c，那么有：\na² + b² = c²\n\n这个定理不仅适用于二维平面，也适用于三维空间中的直角坐标系中的点到原点的距离关系。它是许多几何和代数问题的基础，同时也是现代数学教育中的一个重要概念。', response_metadata={'model_name': 'qwen-turbo', 'finish_reason': 'stop', 'request_id': '09f736aa-84b9-9a31-b249-1cf1a779b034', 'token_usage': {'input_tokens': 22, 'output_tokens': 142, 'total_tokens': 164}}, id='run-0978a38d-3611-4c89-97e2-05280f578591-0')

# 流式模式

In [3]:
for chunk in chat.stream(messages):
    print(chunk.content, end="|", flush=True)

勾|股|定|理是古希腊数学|家毕达哥拉斯发现的一个几何|学基本定理，也被称为毕|达哥拉斯定理。它描述|了直角三角形中各边|之间的关系：在一个直角三角形|中，直角边（即与|直角相邻的两边）的平方|和等于斜边（即三角形|最长边，对直角）的|平方。用数学公式表示就是：

|如果直角三角形的两条直|角边长分别为a和b，|斜边长为c，那么有|：
a² + b² = c|²

这个定理不仅适用于二维|平面，其抽象的概念在三维空间|和其他高维空间中也有类似的表述|。它是解决涉及直角三角形|问题的关键工具，在数学、物理学、|工程学等多个领域都有广泛的应用。||

# 缓存

In [5]:
from langchain.globals import set_llm_cache
from langchain.cache import InMemoryCache

In [11]:
%%time

set_llm_cache(InMemoryCache())

# The first time, it is not yet in cache, so it should take longer
chat.invoke("说一个笑话")

CPU times: user 61.1 ms, sys: 15.2 ms, total: 76.3 ms
Wall time: 818 ms


AIMessage(content='小王剪了一个中分，然后他就变成了小全。', response_metadata={'model_name': 'qwen-turbo', 'finish_reason': 'stop', 'request_id': '11862524-8349-94b5-9a9a-456be26f7336', 'token_usage': {'input_tokens': 11, 'output_tokens': 13, 'total_tokens': 24}}, id='run-764a7128-884a-45c8-8316-80f3d312a919-0')

In [12]:
%%time

# The second time it is, so it goes faster
chat.invoke("说一个笑话")

CPU times: user 827 µs, sys: 106 µs, total: 933 µs
Wall time: 982 µs


AIMessage(content='小王剪了一个中分，然后他就变成了小全。', response_metadata={'model_name': 'qwen-turbo', 'finish_reason': 'stop', 'request_id': '11862524-8349-94b5-9a9a-456be26f7336', 'token_usage': {'input_tokens': 11, 'output_tokens': 13, 'total_tokens': 24}}, id='run-764a7128-884a-45c8-8316-80f3d312a919-0')

# 自定义Chat model

In [14]:
from typing import Any, AsyncIterator, Dict, Iterator, List, Optional

from langchain_core.callbacks import (
    AsyncCallbackManagerForLLMRun,
    CallbackManagerForLLMRun,
)
from langchain_core.language_models import BaseChatModel, SimpleChatModel
from langchain_core.messages import AIMessageChunk, BaseMessage, HumanMessage
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
from langchain_core.runnables import run_in_executor


class CustomChatModelAdvanced(BaseChatModel):
    """A custom chat model that echoes the first `n` characters of the input.

    When contributing an implementation to LangChain, carefully document
    the model including the initialization parameters, include
    an example of how to initialize the model and include any relevant
    links to the underlying models documentation or API.

    Example:

        .. code-block:: python

            model = CustomChatModel(n=2)
            result = model.invoke([HumanMessage(content="hello")])
            result = model.batch([[HumanMessage(content="hello")],
                                 [HumanMessage(content="world")]])
    """

    model_name: str
    """The name of the model"""
    n: int
    """The number of characters from the last message of the prompt to be echoed."""

    def _generate(
        self,
        messages: List[BaseMessage],
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> ChatResult:
        """Override the _generate method to implement the chat model logic.

        This can be a call to an API, a call to a local model, or any other
        implementation that generates a response to the input prompt.

        Args:
            messages: the prompt composed of a list of messages.
            stop: a list of strings on which the model should stop generating.
                  If generation stops due to a stop token, the stop token itself
                  SHOULD BE INCLUDED as part of the output. This is not enforced
                  across models right now, but it's a good practice to follow since
                  it makes it much easier to parse the output of the model
                  downstream and understand why generation stopped.
            run_manager: A run manager with callbacks for the LLM.
        """
        # Replace this with actual logic to generate a response from a list
        # of messages.
        last_message = messages[-1]
        tokens = last_message.content[: self.n]
        message = AIMessage(
            content=tokens,
            additional_kwargs={},  # Used to add additional payload (e.g., function calling request)
            response_metadata={  # Use for response metadata
                "time_in_seconds": 3,
            },
        )
        ##

        generation = ChatGeneration(message=message)
        return ChatResult(generations=[generation])

    def _stream(
        self,
        messages: List[BaseMessage],
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> Iterator[ChatGenerationChunk]:
        """Stream the output of the model.

        This method should be implemented if the model can generate output
        in a streaming fashion. If the model does not support streaming,
        do not implement it. In that case streaming requests will be automatically
        handled by the _generate method.

        Args:
            messages: the prompt composed of a list of messages.
            stop: a list of strings on which the model should stop generating.
                  If generation stops due to a stop token, the stop token itself
                  SHOULD BE INCLUDED as part of the output. This is not enforced
                  across models right now, but it's a good practice to follow since
                  it makes it much easier to parse the output of the model
                  downstream and understand why generation stopped.
            run_manager: A run manager with callbacks for the LLM.
        """
        last_message = messages[-1]
        tokens = last_message.content[: self.n]

        for token in tokens:
            chunk = ChatGenerationChunk(message=AIMessageChunk(content=token))

            if run_manager:
                # This is optional in newer versions of LangChain
                # The on_llm_new_token will be called automatically
                run_manager.on_llm_new_token(token, chunk=chunk)

            yield chunk

        # Let's add some other information (e.g., response metadata)
        chunk = ChatGenerationChunk(
            message=AIMessageChunk(content="", response_metadata={"time_in_sec": 3})
        )
        if run_manager:
            # This is optional in newer versions of LangChain
            # The on_llm_new_token will be called automatically
            run_manager.on_llm_new_token(token, chunk=chunk)
        yield chunk

    @property
    def _llm_type(self) -> str:
        """Get the type of language model used by this chat model."""
        return "echoing-chat-model-advanced"

    @property
    def _identifying_params(self) -> Dict[str, Any]:
        """Return a dictionary of identifying parameters.

        This information is used by the LangChain callback system, which
        is used for tracing purposes make it possible to monitor LLMs.
        """
        return {
            # The model name allows users to specify custom token counting
            # rules in LLM monitoring applications (e.g., in LangSmith users
            # can provide per token pricing for their model and monitor
            # costs for the given LLM.)
            "model_name": self.model_name,
        }

In [15]:
from langchain_core.messages import (
    AIMessage,
    BaseMessage,
    FunctionMessage,
    HumanMessage,
    SystemMessage,
    ToolMessage,
)

model = CustomChatModelAdvanced(n=3, model_name="my_custom_model")

model.invoke(
    [
        HumanMessage(content="hello!"),
        AIMessage(content="Hi there human!"),
        HumanMessage(content="Meow!"),
    ]
)

AIMessage(content='Meo', response_metadata={'time_in_seconds': 3}, id='run-38fb6c01-bc91-4b3a-84b5-989a52a0a245-0')

In [17]:
# 输入也支持字符串，可以等同于`[HumanMessage(content="cat vs dog")]`
for chunk in model.stream("cat vs dog"):
    print(chunk.content, end="|")

c|a|t||

# 记录消耗tokens

In [2]:
from langchain_core.messages import HumanMessage, SystemMessage
from callbacks.manager import get_generic_llms_callback

messages = [
        SystemMessage(content="你是一个数学专家"),
        HumanMessage(content="什么是勾股定理"),
    ]

with get_generic_llms_callback() as cb:
    chat.invoke(messages)
    print(cb)

Tokens Used: 153
	Prompt Tokens: 22
	Completion Tokens: 131
Successful Requests: 1
Total Cost (CYN): ¥0.0027960000000000003


In [6]:
from tongyi.chat_model import CustomChatTongyi

# dashscope_api_key作为参数传入
# 或者配置环境变量`DASHSCOPE_API_KEY`
chat = CustomChatTongyi()

with get_generic_llms_callback() as cb:
    for chunk in chat.stream(messages):
        print(chunk.content, end="|", flush=True)
    
    print()
    print(cb)

勾|股|定|理是古希腊数学|家毕达哥拉斯发现的一个几何|学基本定理，也被称为毕|达哥拉斯定理。它描述|了直角三角形中各边|之间的关系：在一个直角三角形|中，直角边（即与|直角相邻的两边）的平方|和等于斜边（即三角形|最长边，对直角）的|平方。用数学公式表示就是：

|如果直角三角形的两条直|角边长分别为a和b，|斜边长为c，那么有|：
a² + b² = c|²

这个定理不仅适用于二维|平面，其抽象的概念在三维空间|和其他高维空间中也有类似的表述|。它是解决涉及直角三角形|问题的关键工具，在数学、物理学、|工程学等多个领域都有广泛的应用。|||
Tokens Used: 190
	Prompt Tokens: 22
	Completion Tokens: 168
Successful Requests: 1
Total Cost (CYN): ¥0.003536
