In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Getting Started with Mistral AI Models
<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/generative_ai/mistralai_intro.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fofficial%2Fgenerative_ai%2Fmistralai_intro.ipynb">
      <img width="32px" src="https://cloud.google.com/ml-engine/images/colab-enterprise-logo-32px.png" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">                                                                             
    <a href="https://console.cloud.google.com/vertex-ai/notebooks/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/official/generative_ai/mistralai_intro.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/generative_ai/mistralai_intro.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
  
</table>

## Overview

### Mistral AI on Vertex AI

Mistral AI models on Vertex AI offer fully managed and serverless models are offered as managed APIs. To use a Mistral AI model on Vertex AI, send a request directly to the Vertex AI API endpoint.

You can stream your Mistral AI model responses to reduce the end-user latency perception. A streamed response uses server-sent events (SSE) to incrementally stream the response.

Learn more about [Vertex AI](https://cloud.google.com/vertex-ai).

### Available Mistral AI models

*   ### Mistral Large (2407)
Complex tasks that require large reasoning capabilities or are highly specialized (synthetic text Generation, code generation, RAG, or agents). [Blog Post](https://mistral.ai/news/mistral-large-2407/)

*   ### Mistral Nemo
Reasoning, world knowledge, and coding performance are state-of-the-art in its size category.

*   ### Codestral
Coding specific tasks to enhance developers productivity with code completion and fill-in-the-middle capabilities.


## Objective

This notebook shows how to use **Vertex AI API** to call the Mistral AI models on Vertex AI API with the Large, Nemo, and Codestral models.

For more information, see the [Use Mistral's](https://docs.mistral.ai/) documentation and [Mistral's models](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/mistral) on Google Cloud.

- Mistral on Model Garden supports the same API calls as Mistral’s own API endpoints, except for the `safe_prompt` parameter that will return an error if specified in the input. So do not include `safe_prompt` in input requests.
- Documentation links
  - [Mistral APIs](https://docs.mistral.ai/api/)
  - [Chat Completion](https://docs.mistral.ai/api/#operation/createChatCompletion) operations supported by Mistral Large, Mistral Nemo and Codestral
  - [Fill-in-the-middle](https://docs.mistral.ai/api/#operation/createFIMCompletion) operations supported by Codestral

## Vertex AI API

## Get Started - Required first steps


### Authenticate your notebook environment (Colab only)


In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

#### Select one of Mistral AI models

In [None]:
MODEL = "mistral-large"  # @param ["mistral-large", "mistral-nemo", "codestral"]
if MODEL == "mistral-large":
    available_regions = ["europe-west4", "us-central1"]
    available_versions = ["latest", "2407"]
elif MODEL == "mistral-nemo":
    available_regions = ["europe-west4", "us-central1"]
    available_versions = ["latest", "2407"]
elif MODEL == "codestral":
    available_regions = ["europe-west4", "us-central1"]
    available_versions = ["latest", "2405"]

#### Select a location and a version from the dropdown

In [None]:
import ipywidgets as widgets
from IPython.display import display

dropdown_loc = widgets.Dropdown(
    options=available_regions,
    description="Select a location:",
    font_weight="bold",
    style={"description_width": "initial"},
)

dropdown_ver = widgets.Dropdown(
    options=available_versions,
    description="Select the model version (optional):",
    font_weight="bold",
    style={"description_width": "initial"},
)


def dropdown_loc_eventhandler(change):
    global LOCATION
    if change["type"] == "change" and change["name"] == "value":
        LOCATION = change.new
        print("Selected:", change.new)


def dropdown_ver_eventhandler(change):
    global MODEL_VERSION
    if change["type"] == "change" and change["name"] == "value":
        MODEL_VERSION = change.new
        print("Selected:", change.new)


LOCATION = dropdown_loc.value
dropdown_loc.observe(dropdown_loc_eventhandler, names="value")
display(dropdown_loc)

MODEL_VERSION = dropdown_ver.value
dropdown_ver.observe(dropdown_ver_eventhandler, names="value")
display(dropdown_ver)

#### Set Google Cloud project and model information

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com). Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}
ENDPOINT = f"https://{LOCATION}-aiplatform.googleapis.com"
SELECTED_MODEL_VERSION = "" if MODEL_VERSION == "latest" else f"@{MODEL_VERSION}"

if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    raise ValueError("Please set your PROJECT_ID")

#### Import required libraries

In [None]:
import json
import subprocess

import requests

### Sample Requests

#### Text generation

##### Unary call

Sends a POST request to the specified API endpoint to get a response from the model using the provided payload.

In [None]:
PAYLOAD = {
    "model": MODEL,
    "messages": [{"role": "user", "content": "who is the best French painter?"}],
    "max_tokens": 100,
    "stream": False,
}

request = json.dumps(PAYLOAD)
!curl -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" {ENDPOINT}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/publishers/mistralai/models/{MODEL}{SELECTED_MODEL_VERSION}:rawPredict -d '{request}'

With a pretty response

In [None]:
# Get the access token
process = subprocess.Popen(
    "gcloud auth print-access-token", stdout=subprocess.PIPE, shell=True
)
(access_token_bytes, err) = process.communicate()
access_token = access_token_bytes.decode("utf-8").strip()  # Strip newline

# Define query headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Accept": "application/json",
}

# Replace with your actual values
url = f"{ENDPOINT}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/publishers/mistralai/models/{MODEL}{SELECTED_MODEL_VERSION}:rawPredict"
data = {
    "model": MODEL,
    "messages": [{"role": "user", "content": "who is the best French painter?"}],
    "stream": False,
}

# Make the POST request
response = requests.post(url, headers=headers, json=data)

# Check status code and try to parse the response as JSON
if response.status_code == 200:
    try:
        response_dict = response.json()
        print(response_dict["choices"][0]["message"]["content"])
    except json.JSONDecodeError as e:
        print("Error decoding JSON:", e)
        print("Raw response:", response.text)  # Print raw response if parsing fails
else:
    print(f"Request failed with status code: {response.status_code}")

##### Streaming call

Sends a POST request to the specified API endpoint to stream a response from the model using the provided payload.

In [None]:
PAYLOAD = {
    "model": MODEL,
    "messages": [{"role": "user", "content": "who is the best French painter?"}],
    "max_tokens": 100,
    "stream": True,
}

request = json.dumps(PAYLOAD)
!curl -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" {ENDPOINT}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/publishers/mistralai/models/{MODEL}{SELECTED_MODEL_VERSION}:streamRawPredict -d '{request}'

#### Code generation

Mistral Large, Mistral Nemo and Codestral support code generation with the Chat Completion operations covered above.

With Codestral, you can also do Fill-in-the-middle operations.

##### Fill-in-the-middle (FIM)
With this feature, users can define the starting point of the code using a `prompt`, and the ending point of the code using an optional `suffix` and an optional `stop`.

The Codestral model will then generate the code that fits in between, making it ideal for tasks that require a specific piece of code to be generated.

More information on FIM:
- [Mistral API Documentation FIM](https://docs.mistral.ai/api/#operation/createFIMCompletion)
- [Mistral FIM Documentation](https://docs.mistral.ai/capabilities/code_generation/#fill-in-the-middle-endpoint)

Example 1

In [None]:
MODEL = "codestral"
SELECTED_MODEL_VERSION = ""

PAYLOAD = {
    "model": MODEL,
    "prompt": "def say_hello(name: str) -> str",
    "suffix": "return n_words",
}

request = json.dumps(PAYLOAD)
!curl -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" {ENDPOINT}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/publishers/mistralai/models/{MODEL}{SELECTED_MODEL_VERSION}:streamRawPredict -d '{request}'

Example 2 with pretty response

In [None]:
MODEL = "codestral"

# Get the access token
process = subprocess.Popen(
    "gcloud auth print-access-token", stdout=subprocess.PIPE, shell=True
)
(access_token_bytes, err) = process.communicate()
access_token = access_token_bytes.decode("utf-8").strip()  # Strip newline

# Define query headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Accept": "application/json",
}

# Replace with your actual values
url = f"{ENDPOINT}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/publishers/mistralai/models/{MODEL}:rawPredict"
data = {
    "model": MODEL,
    "prompt": "def f(",
    "suffix": "return a + b",
    "max_tokens": 64,
    "temperature": 0,
}

# Make the POST request
response = requests.post(url, headers=headers, json=data)

# Check status code and try to parse the response as JSON
if response.status_code == 200:
    try:
        response_dict = response.json()
        print(response_dict["choices"][0]["message"]["content"])
    except json.JSONDecodeError as e:
        print("Error decoding JSON:", e)
        print("Raw response:", response.text)  # Print raw response if parsing fails
else:
    print(f"Request failed with status code: {response.status_code}")

## Using Mistral AI's Vertex SDK for *Python*

## Get Started

### Install Mistral's Vertex SDK for Python and other required packages

In [None]:
! pip3 install -U -q 'mistralai[gcp]>=1.0.3'
! pip3 install -U -q httpx

In [None]:
# Get the access token
import subprocess

process = subprocess.Popen(
    "gcloud auth print-access-token", stdout=subprocess.PIPE, shell=True
)
(access_token_bytes, err) = process.communicate()
access_token = access_token_bytes.decode("utf-8").strip()  # Strip newline

headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json",
}

### Restart runtime (Colab only)

To use the newly installed packages, you must restart the runtime on Google Colab.

In [None]:
# Restart kernel after installs so that your environment can access the new packages
import sys

if "google.colab" in sys.modules:
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Wait until it's finished before continuing to the next step. ⚠️</b>
</div>

### Authenticate your notebook environment (Colab only)

Authenticate your environment on Google Colab.

In [None]:
import sys

if "google.colab" in sys.modules:

    from google.colab import auth

    auth.authenticate_user()

#### Select one of Mistral AI models

In [None]:
MODEL = "mistral-large"  # @param ["mistral-large", "mistral-nemo", "codestral"]
if MODEL == "mistral-large":
    available_regions = ["europe-west4", "us-central1"]
    available_versions = ["2407"]
elif MODEL == "mistral-nemo":
    available_regions = ["europe-west4", "us-central1"]
    available_versions = ["2407"]
elif MODEL == "codestral":
    available_regions = ["europe-west4", "us-central1"]
    available_versions = ["2405"]

#### Select a location and a version from the dropdown

In [None]:
import ipywidgets as widgets
from IPython.display import display

dropdown_loc = widgets.Dropdown(
    options=available_regions,
    description="Select a location:",
    font_weight="bold",
    style={"description_width": "initial"},
)

dropdown_ver = widgets.Dropdown(
    options=available_versions,
    description="Select the model version (optional):",
    font_weight="bold",
    style={"description_width": "initial"},
)


def dropdown_loc_eventhandler(change):
    global LOCATION
    if change["type"] == "change" and change["name"] == "value":
        LOCATION = change.new
        print("Selected:", change.new)


def dropdown_ver_eventhandler(change):
    global MODEL_VERSION
    if change["type"] == "change" and change["name"] == "value":
        MODEL_VERSION = change.new
        print("Selected:", change.new)


LOCATION = dropdown_loc.value
dropdown_loc.observe(dropdown_loc_eventhandler, names="value")
display(dropdown_loc)

MODEL_VERSION = dropdown_ver.value
dropdown_ver.observe(dropdown_ver_eventhandler, names="value")
display(dropdown_ver)

#### Set Google Cloud project and model information

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com). Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}
ENDPOINT = f"https://{LOCATION}-aiplatform.googleapis.com"

if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    raise ValueError("Please set your PROJECT_ID")

#### Import required libraries

In [None]:
import json

import requests

### Sample Requests

#### Text generation

##### Unary call

Initializes a client for Mistral AI's Vertex AI, sends a request to generate the content, and prints the response in a formatted JSON

In [None]:
import subprocess

from mistralai_gcp import MistralGoogleCloud

client = MistralGoogleCloud(
    access_token=access_token, region=LOCATION, project_id=PROJECT_ID
)

try:
    resp = client.chat.complete(
        model=f"{MODEL}-{MODEL_VERSION}",
        messages=[
            {
                "role": "user",
                "content": "Who is the best French painter? Answer in one short sentence.",
            }
        ],
    )
    print(resp.choices[0].message.content)

except Exception as e:
    print(f"An error occurred: {e}")

##### Streaming call

Initializes a client for Mistral AI's Vertex AI, sends a streaming request to generate the content, and continuously prints the received text as it is streamed.

In [None]:
from mistralai_gcp import MistralGoogleCloud

client = MistralGoogleCloud(
    access_token=access_token, region=LOCATION, project_id=PROJECT_ID
)

try:
    stream = client.chat.stream(
        model=f"{MODEL}-{MODEL_VERSION}",
        max_tokens=1024,
        messages=[
            {
                "role": "user",
                "content": "Who is the best French painter? Answer in one short sentence.",
            }
        ],
    )

    for chunk in stream:
        print(chunk.data.choices[0].delta.content)

except Exception as e:
    print(f"An error occurred: {e}")

#### Code generation

Mistral Large, Mistral Nemo and Codestral support code generation with the Chat Completion operations covered above.

With Codestral, you can also do Fill-in-the-middle operations.

##### Fill-in-the-middle (FIM)
With this feature, users can define the starting point of the code using a `prompt`, and the ending point of the code using an optional `suffix` and an optional `stop`.

The Codestral model will then generate the code that fits in between, making it ideal for tasks that require a specific piece of code to be generated.

More information on FIM:
- [Mistral API Documentation FIM](https://docs.mistral.ai/api/#operation/createFIMCompletion)
- [Mistral FIM Documentation](https://docs.mistral.ai/capabilities/code_generation/#fill-in-the-middle-endpoint)

In [None]:
from mistralai_gcp import MistralGoogleCloud

client = MistralGoogleCloud(
    access_token=access_token, region=LOCATION, project_id=PROJECT_ID
)

MODEL = "codestral"
MODEL_VERSION = "2405"

try:
    resp = client.fim.complete(
        model=f"{MODEL}-{MODEL_VERSION}",
        prompt="def count_words_in_file(file_path: str) -> int",
        suffix="return n_words",
    )

    print(resp.choices[0].message.content)

except Exception as e:
    print(f"An error occurred: {e}")

## Model Capabilities

### Function Calling with Mistral Large

Function calling allows Mistral models to connect to external tools. By integrating Mistral models with external tools such as user defined functions or APIs, users can easily build applications catering to specific use cases and practical problems.

This guide is the one Mistral provides [here](https://docs.mistral.ai/capabilities/function_calling/). We write two functions for tracking payment status and payment date. We can use these two tools to provide answers for payment-related queries.

#### Step 1. User: specify tools

Define sample data like this was stored in a sample database.

In [None]:
import pandas as pd

# Assuming we have the following data
data = {
    "transaction_id": ["T1001", "T1002", "T1003", "T1004", "T1005"],
    "customer_id": ["C001", "C002", "C003", "C002", "C001"],
    "payment_amount": [125.50, 89.99, 120.00, 54.30, 210.20],
    "payment_date": [
        "2021-10-05",
        "2021-10-06",
        "2021-10-07",
        "2021-10-05",
        "2021-10-08",
    ],
    "payment_status": ["Paid", "Unpaid", "Paid", "Paid", "Pending"],
}

# Create DataFrame
df = pd.DataFrame(data)

Define the functions that will be used as tools.

In [None]:
def retrieve_payment_status(df: data, transaction_id: str) -> str:
    if transaction_id in df.transaction_id.values:
        return json.dumps(
            {"status": df[df.transaction_id == transaction_id].payment_status.item()}
        )
    return json.dumps({"error": "transaction id not found."})


def retrieve_payment_date(df: data, transaction_id: str) -> str:
    if transaction_id in df.transaction_id.values:
        return json.dumps(
            {"date": df[df.transaction_id == transaction_id].payment_date.item()}
        )
    return json.dumps({"error": "transaction id not found."})

Define the tools for those functions following the right JSON format.

In [None]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "retrieve_payment_status",
            "description": "Get payment status of a transaction",
            "parameters": {
                "type": "object",
                "properties": {
                    "transaction_id": {
                        "type": "string",
                        "description": "The transaction id.",
                    }
                },
                "required": ["transaction_id"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "retrieve_payment_date",
            "description": "Get payment date of a transaction",
            "parameters": {
                "type": "object",
                "properties": {
                    "transaction_id": {
                        "type": "string",
                        "description": "The transaction id.",
                    }
                },
                "required": ["transaction_id"],
            },
        },
    },
]

#### Step 2. Model: Generate the right tool and arguments with Mistral Large

In [None]:
url = f"{ENDPOINT}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/publishers/mistralai/models/{MODEL}:rawPredict"
data = {
    "model": MODEL,
    "messages": [
        {"role": "user", "content": "What is the status of my transaction T1001?"}
    ],
    "tools": tools,
    "tool_choice": "any",
}
function_name = None
function_params = None

# Make the POST request
response = requests.post(url, headers=headers, json=data)

# Check status code and try to parse the response as JSON
if response.status_code == 200:
    try:
        response_dict = response.json()
        tool_call = response_dict["choices"][0]["message"]["tool_calls"][0]
        function_name = tool_call["function"]["name"]
        function_params = json.loads(tool_call["function"]["arguments"])
    except json.JSONDecodeError as e:
        print("Error decoding JSON:", e)
        print("Raw response:", response.text)  # Print raw response if parsing fails
else:
    print(f"Request failed with status code: {response.status_code}")

#### Step 3. User: Extract the tool function name, the params and execute the tool function

In [None]:
if function_name and function_params:
    print("\nfunction_name: ", function_name, "\nfunction_params: ", function_params)

Map function names returned by Mistral model to the actual function object in the environment.

In [None]:
import functools

names_to_functions = {
    "retrieve_payment_status": functools.partial(retrieve_payment_status, df=df),
    "retrieve_payment_date": functools.partial(retrieve_payment_date, df=df),
}

Call the right function with the parameters suggested by Mistral's model.

In [None]:
if function_name and function_params:
    function_result = names_to_functions[function_name](**function_params)
    function_result

### JSON Output Mode

You can force the response format to JSON by adding `"response_format": {"type": "json_object"}` in the JSON payload of the request
See Mistral's documentation on JSON mode

*   See Mistral's [documentation](https://docs.mistral.ai/capabilities/json_mode/) on JSON mode
*   See Mistral's API [documentation](https://docs.mistral.ai/api/#operation/createChatCompletion)

In [None]:
PAYLOAD = {
    "model": MODEL,
    "messages": [
        {
            "role": "user",
            "content": "What is the best French cheese? Return the product and produce location in JSON format",
        }
    ],
    "response_format": {"type": "json_object"},
}

request = json.dumps(PAYLOAD)
!curl -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" {ENDPOINT}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/publishers/mistralai/models/{MODEL}:rawPredict -d '{request}'

Pretty response

In [None]:
# Get the access token
process = subprocess.Popen(
    "gcloud auth print-access-token", stdout=subprocess.PIPE, shell=True
)
(access_token_bytes, err) = process.communicate()
access_token = access_token_bytes.decode("utf-8").strip()  # Strip newline

# Replace with your actual values
url = f"{ENDPOINT}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/publishers/mistralai/models/{MODEL}:rawPredict"
data = {
    "model": MODEL,
    "messages": [
        {
            "role": "user",
            "content": "What is the best French cheese? Return the product and produce location in JSON format",
        }
    ],
    "response_format": {"type": "json_object"},
}
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json",
}

# Make the POST request
response = requests.post(url, headers=headers, json=data)

# Check status code and try to parse the response as JSON
if response.status_code == 200:
    try:
        response_dict = response.json()
        print(response_dict["choices"][0]["message"]["content"])
    except json.JSONDecodeError as e:
        print("Error decoding JSON:", e)
        print("Raw response:", response.text)  # Print raw response if parsing fails
else:
    print(f"Request failed with status code: {response.status_code}")