# Mistral Tokenizer

In [None]:
! pip install mistral-common

In [None]:
from mistral_common.protocol.instruct.messages import (
    AssistantMessage,
    UserMessage,
    ToolMessage,
)
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
from mistral_common.protocol.instruct.tool_calls import (
    Function,
    Tool,
    ToolCall,
    FunctionCall,
)
from mistral_common.protocol.instruct.request import ChatCompletionRequest

## Load Mistral tokenizer

We just released three versions of our tokenizer powering different sets of models.

- v1: open-mistral-7b, open-mixtral-8x7b, mistral-embed
- v2: mistral-small, mistral-large
- v3: open-mixtral-8x22b

Let's load v3:

In [None]:
tokenizer_v3 = MistralTokenizer.v3()

# you can also load tokenizers using the model name
# tokenizer_v3 = MistralTokenizer.from_model("open-mixtral-8x22b")

## Tokenize a list of messages

In [None]:
tokenized = tokenizer_v3.encode_chat_completion(
    ChatCompletionRequest(
        tools=[
            Tool(
                function=Function(
                    name="get_current_weather",
                    description="Get the current weather",
                    parameters={
                        "type": "object",
                        "properties": {
                            "location": {
                                "type": "string",
                                "description": "The city and state, e.g. San Francisco, CA",
                            },
                            "format": {
                                "type": "string",
                                "enum": ["celsius", "fahrenheit"],
                                "description": "The temperature unit to use. Infer this from the users location.",
                            },
                        },
                        "required": ["location", "format"],
                    },
                )
            )
        ],
        messages=[
            UserMessage(content="What's the weather like today in Paris"),
            AssistantMessage(
                content=None,
                tool_calls=[
                    ToolCall(
                        id="VvvODy9mT",
                        function=FunctionCall(
                            name="get_current_weather",
                            arguments='{"location": "Paris, France", "format": "celsius"}',
                        ),
                    )
                ],
            ),
            ToolMessage(
                tool_call_id="VvvODy9mT", name="get_current_weather", content="22"
            ),
            AssistantMessage(
                content="The current temperature in Paris, France is 22 degrees Celsius.",
            ),
            UserMessage(content="What's the weather like today in San Francisco"),
            AssistantMessage(
                content=None,
                tool_calls=[
                    ToolCall(
                        id="fAnpW3TEV",
                        function=FunctionCall(
                            name="get_current_weather",
                            arguments='{"location": "San Francisco", "format": "celsius"}',
                        ),
                    )
                ],
            ),
            ToolMessage(
                tool_call_id="fAnpW3TEV", name="get_current_weather", content="20"
            ),
        ],
        model="test",
    )
)

In [None]:
tokens, text = tokenized.tokens, tokenized.text

In [None]:
text

'<s>[INST] What\'s the weather like today in Paris[/INST][TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"location": "Paris, France", "format": "celsius"}, "id": "VvvODy9mT"}]</s>[TOOL_RESULTS] {"call_id": "VvvODy9mT", "content": 22}[/TOOL_RESULTS] The current temperature in Paris, France is 22 degrees Celsius.</s>[AVAILABLE_TOOLS] [{"type": "function", "function": {"name": "get_current_weather", "description": "Get the current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "format": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The temperature unit to use. Infer this from the users location."}}, "required": ["location", "format"]}}}][/AVAILABLE_TOOLS][INST] What\'s the weather like today in San Francisco[/INST][TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"location": "San Francisco", "format": "celsius"}, "id": "fAnpW3TEV"}]</s>[TOOL

In [None]:
len(tokens)

302