# Message

In [1]:
from langchain_core.messages import BaseMessage, AIMessage, HumanMessage, SystemMessage,ToolMessage, ToolCall
# from langchain_core.utils.utils import ensure_id
from typing import Any, Mapping, List

In [2]:
def _convert_hf_message_to_lc_message(_dict: Mapping[str, Any]) -> BaseMessage:
  """
  Convert message  in huggingface style to Langchain Message style
  """
  role = _dict["role"]
  content = _dict["content"]
  message = None

  if role == "system":
    message = SystemMessage(content=content)
  elif role == "user":
    message = HumanMessage(content=content)
  elif role == "assistant":
    message = AIMessage(content=content)
  elif role == "developer":
    # such model like : GPT-OSS
    message = SystemMessage(content=content)
  else:
    raise ValueError(f"Invalid role: {role}")

  return message

def convert_hf_message_to_lc_message(messages: List[Mapping[str, Any]]) -> List[BaseMessage]:
  """
  Convert messages in huggingface style to Langchain Message style
  """
  chat_messages = []

  for message in messages:
    _message = _convert_hf_message_to_lc_message(message)
    chat_messages.append(_message)

  return chat_messages

def convert_lc_message_to_hf_messages(messages: List[BaseMessage]) -> List[BaseMessage]:
  """
  Convert messages in Langchain style to huggingface style
  """
  chat_messages = []

  for message in messages:
    _type = message.type

    if _type == "system":
      role = "system"
      content = message.content
      msg = {"role" : "system", "content" : content}
      chat_messages.append(msg)
    elif _type == "human":
      role = "user"
      content = message.content
      msg = {"role" : "user", "content" : content}
      chat_messages.append(msg)
    elif _type == "tool":
      role = "assistant"
      tool_call_id = message.tool_call_id
      status = message.status
      content = f"Respone of tool_call_id {tool_call_id} with status {status} and content : {message.content}"
      msg = {"role" : "assistant", "content" : content}
      chat_messages.append(msg)
    elif _type == "ai":
      role = "assistant"

      # check if there is a tool call
      tool_calls = message.tool_calls
      if tool_calls:
        # this is a tool call
        for tool_call in tool_calls:
          name = tool_call["name"]
          args = tool_call["args"]
          id = tool_call["id"]
          content = f"Tool Calling:\n tool_call_id : {id}\n name : {name}\n args : {args}"
          msg = {"role" : "assistant", "content" : content}
          chat_messages.append(msg)
      else:
        content = message.content
        msg = {"role" : "assistant", "content" : content}
        chat_messages.append(msg)
    else:
      raise ValueError(f"Invalid message type: {_type}")


  return chat_messages

# Tool

In [3]:
def create_tool_call(
    name: str,
    args: dict[str, Any],
    *,
    id: str | None = None,
    index: int | str | None = None,
    **kwargs: Any,
) -> ToolCall:
    """Create a `ToolCall`.

    Args:
        name: The name of the tool to be called.
        args: The arguments to the tool call.
        id: An identifier for the tool call.

            Generated automatically if not provided.
        index: Index of block in aggregate response.

            Used during streaming.

    Returns:
        A properly formatted `ToolCall`.

    !!! note

        The `id` is generated automatically if not provided, using a UUID4 format
        prefixed with `'lc_'` to indicate it is a LangChain-generated ID.
    """
    block = ToolCall(
        type="tool_call",
        name=name,
        args=args,
        id=ensure_id(id),
    )

    if index is not None:
        block["index"] = index

    extras = {k: v for k, v in kwargs.items() if v is not None}
    if extras:
        block["extras"] = extras

    return block


# Model

In [4]:
import json
import re
from typing import Any, Dict, List, Optional, Union, Callable
from abc import ABC, abstractmethod
import inspect
from dataclasses import dataclass

from transformers import AutoTokenizer, AutoModelForCausalLM, PreTrainedTokenizer, PreTrainedModel,BitsAndBytesConfig
import torch
from pydantic import BaseModel

from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.messages import BaseMessage, AIMessage, HumanMessage, SystemMessage,ToolMessage
from langchain_core.outputs import ChatGeneration, ChatResult
from langchain_core.callbacks.manager import CallbackManagerForLLMRun
from langchain_core.tools import BaseTool, tool
from langchain_core.utils.function_calling import convert_to_openai_tool
from collections.abc import Callable, Mapping, Sequence
from langchain_core.language_models import (
    LanguageModelInput
)
from langchain_core.runnables import Runnable


2026-02-12 18:28:18.305808: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1770920898.328380   14018 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1770920898.335390   14018 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1770920898.352646   14018 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1770920898.352664   14018 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1770920898.352667   14018 computation_placer.cc:177] computation placer alr

In [5]:
class TransformerChatModel(BaseChatModel):

  # for creating model
  pretrained_model_name_or_path: str
  device : str = "auto"
  attn_implementation : str|None = None
  quantization_config : Any = None
  torch_dtype : Any = "auto"
  hf_token : str|None = None # some model need authorized
  # tools
  bound_tools : List[Any] = []

  # for generation
  max_new_tokens: int = 512
  temperature: float = 0.7
  do_sample: bool = True
  additional_generation_kwargs: dict[str, Any] = {}

  # other
  model: Any = None
  tokenizer : Any = None
  max_context_length : int = None


  def __init__(
      self,
      *,
      model = None,
      tokenizer = None,
      **kwargs: Any
  ):
    # initial pydantic
    super().__init__(**kwargs)

    if model and tokenizer:
      self.model = model
      self.tokenizer = tokenizer
    else:
      torch_dtype = self.torch_dtype
      if isinstance(torch_dtype, str):
        if torch_dtype == "float16":
          torch_dtype = torch.float16
        elif torch_dtype == "float32":
          torch_dtype = torch.float32
        elif torch_dtype == "bfloat16":
          torch_dtype = torch.bfloat16
        elif torch_dtype == "auto":
          torch_dtype = "auto"
        else:
          raise ValueError(f"Invalid torch_dtype: {torch_dtype}")
      elif not isinstance(torch_dtype, torch.dtype):
        raise ValueError(f"Invalid torch_dtype: {torch_dtype}")

      load_model_config = {
          "pretrained_model_name_or_path" : self.pretrained_model_name_or_path,
          "torch_dtype" : torch_dtype,
          "device_map" : self.device,
          "attn_implementation" : self.attn_implementation,
          "quantization_config" : self.quantization_config,
          "token" : self.hf_token
      }
      print("Loading Model from hugginface...\n")
      self.model = AutoModelForCausalLM.from_pretrained(**load_model_config)
      self.tokenizer = AutoTokenizer.from_pretrained(self.pretrained_model_name_or_path)
    # Set pad token if not exists
    if self.tokenizer.pad_token is None:
        self.tokenizer.pad_token = self.tokenizer.eos_token

    # set max_context_length
    if self.max_context_length is None:
      try:
        self.max_context_length = self.model.config.max_position_embeddings
      except :
        self.max_context_length = 1024

  @property
  def _generation_config(self) -> dict:
    generation_config = {
        "max_new_tokens": self.max_new_tokens,
        "temperature": self.temperature,
        "do_sample": self.do_sample,
        "pad_token_id": self.tokenizer.pad_token_id,
        **self.additional_generation_kwargs
    }
    return generation_config

  def _generate(
        self,
        messages: list[BaseMessage],
        stop: list[str] | None = None,
        run_manager: CallbackManagerForLLMRun | None = None,
        stream: bool | None = None,  # noqa: FBT001
        **kwargs: Any,
    ) -> ChatResult:
    
    tool_instruction = self.get_tools_desc()
    if tool_instruction:
      messages = [SystemMessage(content=tool_instruction)] + messages
    # convert messages to prompt
    hf_msg = convert_lc_message_to_hf_messages(messages)
    # tokenize
    inputs = self.tokenizer.apply_chat_template(
        hf_msg,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
        truncation=True,
        max_length=self.max_context_length,
        return_dict = True
    ).to(self.model.device)

    generation_config = self._generation_config
    with torch.inference_mode():
      outputs = self.model.generate(
        **inputs,
        **generation_config
      )
    # Decode response
    response = self.tokenizer.decode(
      outputs[0][inputs['input_ids'].shape[1]:],
      skip_special_tokens=True
    ).strip()
    
    msg = AIMessage(content = response)
    # post-process to extract tool_call here
    # CODE
    return ChatResult(generations=[ChatGeneration(message=msg)])



  def get_tools_desc(self) -> str:
    """
    Return all tools description in a string.
    The tool must be converted into openai format.
    Each description is converted into format:

    name :
        docstring.
        params:
          * param1 (type)
          * param2 (type)

    """

    tools_desc = ""
    for tool in self.bound_tools:
      tool_info = tool["function"]
      tool_name = tool_info["name"]
      tool_desc = tool_info.get("description", "")
      if tool_desc:
        tool_desc = "\n\t"+tool_desc
      params_desc = []
      for param_name, param_info in tool_info["parameters"].get("properties",{}).items():
        param_type = param_info.get("type", "any")
        param_desc = param_info.get("description", "")
        additional_info = ";".join([f"{k}: {v}" for k,v in param_info.items() if k != "type" and k != "description"])
        if param_desc or additional_info:
          param_desc = f"\t\t{param_name} ({param_type}) -- {param_desc} {additional_info}"
        else:
          param_desc = f"\t\t{param_name} ({param_type})"
        params_desc.append(param_desc)

      if not params_desc:
        params_desc = "This tool don't have parameters"
      else:
        params_desc = "\n".join(params_desc)

      tool_desc = f"""**{tool_name}**:{tool_desc}\n\tparams:\n{params_desc}\n\n"""
      tools_desc += tool_desc

    if tools_desc:
      return f"""You have access to the following tools :

{tools_desc}

Whenever you need to use a tool, you MUST respond with a JSON code block in this exact format:
```json
{{
    "tool_calls": [
        {{
            "name": "tool_name",
            "arguments": {{"param1": "value1", "param2": "value2"}}
        }}
    ]
}}
```
Only use provided tools, do not invent new ones.
If you don't need to use any tools, respond normally without the JSON block.
Tool responses are provided with a tool_call_id.
Always match each responese to the corresponding tool call using this ID and use the matched results to produce the final answer.
A user query may contain multiple sub-questions.
Use tools only for sub-questions that require them, answer the rest directly, and merge all results into the final response.
Do not explain how you use tools.
"""
    return ""


  def bind_tools(
      self,
      tools: Sequence[dict[str, Any] | type | Callable | BaseTool],
      *,
      tool_choice: dict | str | bool | None = None,
      **kwargs: Any,
  ) -> Runnable[LanguageModelInput, AIMessage]:

    # convert tools to open_ai format
    formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
    # I add tool_choice so as model can work with agent
    # (creating agent with tool will call model.bind_tool with too_choice)
    # I want all my tools can be used
    model_with_tools = self.__class__(
        model = self.model,
        tokenizer = self.tokenizer,
        pretrained_model_name_or_path = self.pretrained_model_name_or_path,
        device = self.device,
        attn_implementation = self.attn_implementation,
        quantization_config = self.quantization_config,
        torch_dtype = self.torch_dtype,
        hf_token = self.hf_token,
        bound_tools = formatted_tools,
        max_new_tokens = self.max_new_tokens,
        temperature = self.temperature,
        do_sample = self.do_sample,
        **self.additional_generation_kwargs
    )

    return model_with_tools


  @property
  def _llm_type(self) -> str:
    return "transformer-chat-model"



In [6]:
pretrained_model_name_or_path = "Qwen/Qwen2.5-3B-Instruct"
# Create your quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
chat_model = TransformerChatModel(
    pretrained_model_name_or_path=pretrained_model_name_or_path,
    quantization_config = bnb_config
    )

Loading Model from hugginface...



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
@tool
def factorial(num: int) -> int:
  """Use to calculate the factorial of number or number!."""
  fact=1
  for i in range(1,num+1):
    fact=fact*i
  return fact

In [8]:
chat_with_tool = chat_model.bind_tools(tools = [factorial])

In [9]:
response=chat_with_tool.invoke("What is 5! ?")
response

AIMessage(content='```json\n{\n    "tool_calls": [\n        {\n            "name": "factorial",\n            "arguments": {"num": "5"}\n        }\n    ]\n}\n```', additional_kwargs={}, response_metadata={}, id='run--054db17c-c5b4-4733-8a12-85db4bc55b91-0')

In [17]:
json.loads(response.content[8:-4])

{'tool_calls': [{'name': 'factorial', 'arguments': {'num': '5'}}]}