In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Model Garden - TimesFM 1.0 (CPU/GPU Deployment)

<table><tbody><tr>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fmodel_garden%2Fmodel_garden_timesfm_deployment_on_vertex.ipynb">
      <img alt="Google Cloud Colab Enterprise logo" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" width="32px"><br> Run in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_timesfm_deployment_on_vertex.ipynb">
      <img alt="GitHub logo" src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" width="32px"><br> View on GitHub
    </a>
  </td>
</tr></tbody></table>

## Overview

This notebook demonstrates deploying TimesFM 1.0 to a Vertex AI Endpoint and making online predictions for times series forecast.


### Objective

- Deploy TimesFM 1.0 to a Vertex AI Endpoint.
- Make predictions to the endpoint for times series forecast.

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing), [Cloud Storage pricing](https://cloud.google.com/storage/pricing), and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Setup Google Cloud project

In [None]:
# @markdown ### **Prerequisites**
# @markdown 1. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).
# @markdown 2. [Optional] [Create a Cloud Storage bucket](https://cloud.google.com/storage/docs/creating-buckets)
# @markdown for storing experiment outputs. Set the BUCKET_URI for the
# @markdown experiment environment. The specified Cloud Storage bucket
# @markdown (`BUCKET_URI`) should be located in the same region as where the
# @markdown notebook was launched. Note that a multi-region bucket (eg. "us") is
# @markdown not considered a match for a single region covered by the
# @markdown multi-region range (eg. "us-central1"). If not set, a unique GCS
# @markdown bucket will be created instead.

import json
import os
# Import the necessary packages
from datetime import datetime
from typing import Tuple

from google.cloud import aiplatform

# Get the default cloud project id.
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]

# Get the default region for launching jobs.
REGION = os.environ["GOOGLE_CLOUD_REGION"]

print(f"Using this region: {REGION}")

# Enable the Vertex AI API and Compute Engine API, if not already.
print("Enabling Vertex AI API and Compute Engine API.")
! gcloud services enable aiplatform.googleapis.com compute.googleapis.com

# @markdown Cloud Storage bucket for storing the experiment artifacts.
# @markdown A unique GCS bucket will be created for the purpose of this
# @markdown notebook. If you prefer using your own GCS bucket, change the value
# @markdown yourself below.
now = datetime.now().strftime("%Y%m%d%H%M%S")
BUCKET_URI = "gs://"  # @param {type:"string"}
assert BUCKET_URI.startswith("gs://"), "BUCKET_URI must start with `gs://`."

# Create a unique GCS bucket for this notebook, if not specified by the user
if BUCKET_URI is None or BUCKET_URI.strip() == "" or BUCKET_URI == "gs://":
    BUCKET_URI = f"gs://{PROJECT_ID}-tmp-{now}"
    ! gsutil mb -l {REGION} {BUCKET_URI}
else:
    BUCKET_NAME = "/".join(BUCKET_URI.split("/")[:3])
    shell_output = ! gsutil ls -Lb {BUCKET_NAME} | grep "Location constraint:" | sed "s/Location constraint://"
    bucket_region = shell_output[0].strip().lower()
    if bucket_region != REGION:
        raise ValueError(
            f"Bucket region {bucket_region} is different from notebook region"
            f" {REGION}"
        )
print(f"Using this GCS Bucket: {BUCKET_URI}")

STAGING_BUCKET = os.path.join(BUCKET_URI, "temporal")
MODEL_BUCKET = os.path.join(BUCKET_URI, "timesfm")

# Initialize Vertex AI API.
print("Initializing Vertex AI API.")
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=STAGING_BUCKET)

# Set up default SERVICE_ACCOUNT
SERVICE_ACCOUNT = None
shell_output = ! gcloud projects describe $PROJECT_ID
project_number = shell_output[-1].split(":")[1].strip().replace("'", "")
SERVICE_ACCOUNT = f"{project_number}-compute@developer.gserviceaccount.com"
print("Using this default Service Account:", SERVICE_ACCOUNT)

# Provision permissions to the SERVICE_ACCOUNT with the GCS bucket
BUCKET_NAME = "/".join(BUCKET_URI.split("/")[:3])
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.admin $BUCKET_NAME


# @markdown ### **Choose a prebuilt checkpoint**
# @markdown Here we specify where to get the model checkpoint. TimesFM
# @markdown pretrained checkpoints are by default saved under
# @markdown `gs://vertex-model-garden-public-{region}/timesfm` and indexed by
# @markdown the checkpoint version.

VERTEX_AI_MODEL_GARDEN_TIMESFM = "gs://vertex-model-garden-public-us/timesfm"  # @param {type:"string", isTemplate:true} ["gs://vertex-model-garden-public-us/timesfm", "gs://vertex-model-garden-public-eu/timesfm", "gs://vertex-model-garden-public-asia/timesfm"]
MODEL_VARIANT = "timesfm-1.0-200m"  # @param ["timesfm-1.0-200m"]

print(
    "Copying TimesFM model artifacts from",
    f"{VERTEX_AI_MODEL_GARDEN_TIMESFM}/{MODEL_VARIANT}",
    "to",
    MODEL_BUCKET,
)

! gsutil -m cp -r -R $VERTEX_AI_MODEL_GARDEN_TIMESFM/$MODEL_VARIANT $MODEL_BUCKET

model_path_prefix = MODEL_BUCKET

# The pre-built serving docker images.
SERVE_DOCKER_URI = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/jax-timesfm-serve:20240528_1310_RC00"


def get_job_name_with_datetime(prefix: str) -> str:
    """Gets the job name with date time when triggering training or deployment

    jobs in Vertex AI.
    """
    return prefix + datetime.now().strftime("_%Y%m%d_%H%M%S")


def deploy_model(
    model_name: str,
    checkpoint_path: str,
    horizon: str,
    machine_type: str = "g2-standard-4",
    accelerator_type: str = "NVIDIA_L4",
    accelerator_count: int = 1,
    deploy_source: str = "notebook",
) -> Tuple[aiplatform.Model, aiplatform.Endpoint]:
    """Create a Vertex AI Endpoint and deploy the specified model to the endpoint."""
    model_name_with_time = get_job_name_with_datetime(model_name)
    endpoint = aiplatform.Endpoint.create(
        display_name=f"{model_name_with_time}-endpoint",
        credentials=aiplatform.initializer.global_config.credentials,
    )

    if accelerator_type == "ACCELERATOR_TYPE_UNSPECIFIED":
        timesfm_backend = "cpu"
        accelerator_type = None
    elif accelerator_type.startswith("NVIDIA"):
        timesfm_backend = "gpu"
    else:
        timesfm_backend = "tpu"

    model = aiplatform.Model.upload(
        display_name=model_name_with_time,
        artifact_uri=checkpoint_path,
        serving_container_image_uri=SERVE_DOCKER_URI,
        serving_container_ports=[8080],
        serving_container_predict_route="/predict",
        serving_container_health_route="/health",
        serving_container_environment_variables={
            "DEPLOY_SOURCE": deploy_source,
            "TIMESFM_HORIZON": str(horizon),
            "TIMESFM_BACKEND": timesfm_backend,
        },
        credentials=aiplatform.initializer.global_config.credentials,
    )
    print(
        f"Deploying {model_name_with_time} on {machine_type} with"
        f" {accelerator_count} {accelerator_type} GPU(s)."
    )
    model.deploy(
        endpoint=endpoint,
        machine_type=machine_type,
        accelerator_type=accelerator_type,
        accelerator_count=accelerator_count,
        deploy_request_timeout=1800,
        service_account=SERVICE_ACCOUNT,
        enable_access_logging=True,
        min_replica_count=1,
        sync=True,
    )
    return model, endpoint


def get_quota(project_id: str, region: str, resource_id: str) -> int:
    """Returns the quota for a resource in a region.

    Returns -1 if can not figure out the quota.
    """
    service_endpoint = "aiplatform.googleapis.com"
    quota_list_output = !gcloud alpha services quota list --service=$service_endpoint  --consumer=projects/$project_id --filter="$service_endpoint/$resource_id" --format=json
    # Use '.s' on the command output because it is an SList type.
    quota_data = json.loads(quota_list_output.s)
    if len(quota_data) == 0 or "consumerQuotaLimits" not in quota_data[0]:
        return -1
    if (
        len(quota_data[0]["consumerQuotaLimits"]) == 0
        or "quotaBuckets" not in quota_data[0]["consumerQuotaLimits"][0]
    ):
        return -1
    all_regions_data = quota_data[0]["consumerQuotaLimits"][0]["quotaBuckets"]
    for region_data in all_regions_data:
        if (
            region_data.get("dimensions")
            and region_data["dimensions"]["region"] == region
        ):
            if "effectiveLimit" in region_data:
                return int(region_data["effectiveLimit"])
            else:
                return 0
    return -1


def get_resource_id(accelerator_type: str, is_for_training: bool) -> str:
    """Returns the resource id for a given accelerator type and the use case.

    Args:
      accelerator_type: The accelerator type.
      is_for_training: Whether the resource is used for training. Set false for
        serving use case.

    Returns:
      The resource id.
    """
    training_accelerator_map = {
        "NVIDIA_TESLA_V100": "custom_model_training_nvidia_v100_gpus",
        "NVIDIA_L4": "custom_model_training_nvidia_l4_gpus",
        "NVIDIA_TESLA_A100": "custom_model_training_nvidia_a100_gpus",
        "ACCELERATOR_TYPE_UNSPECIFIED": "custom_model_training_cpus",
    }
    serving_accelerator_map = {
        "NVIDIA_TESLA_V100": "custom_model_serving_nvidia_v100_gpus",
        "NVIDIA_L4": "custom_model_serving_nvidia_l4_gpus",
        "NVIDIA_TESLA_A100": "custom_model_serving_nvidia_a100_gpus",
        "ACCELERATOR_TYPE_UNSPECIFIED": "custom_model_serving_cpus",
    }
    if is_for_training:
        if accelerator_type in training_accelerator_map:
            return training_accelerator_map[accelerator_type]
        else:
            raise ValueError(
                f"Could not find accelerator type: {accelerator_type} for training."
            )
    else:
        if accelerator_type in serving_accelerator_map:
            return serving_accelerator_map[accelerator_type]
        else:
            raise ValueError(
                f"Could not find accelerator type: {accelerator_type} for serving."
            )


def check_quota(
    project_id: str,
    region: str,
    accelerator_type: str,
    accelerator_count: int,
    is_for_training: bool,
):
    """Checks if the project and the region has the required quota."""
    resource_id = get_resource_id(accelerator_type, is_for_training)
    quota = get_quota(project_id, region, resource_id)
    quota_request_instruction = (
        "Either use "
        "a different region or request additional quota. Follow "
        "instructions here "
        "https://cloud.google.com/docs/quotas/view-manage#requesting_higher_quota"
        " to check quota in a region or request additional quota for "
        "your project."
    )
    if quota == -1:
        raise ValueError(
            f"""Quota not found for: {resource_id} in {region}.
            {quota_request_instruction}"""
        )
    if quota < accelerator_count:
        raise ValueError(
            f"""Quota not enough for {resource_id} in {region}:
            {quota} < {accelerator_count}.
            {quota_request_instruction}"""
        )

## Deploy TimesFM to a Vertex AI Endpoint

In [None]:
# @markdown This section uploads the prebuilt TimesFM model to Model Registry
# @markdown and deploys it to a Vertex AI Endpoint.
# @markdown It takes **approximately 20 minutes** to deploy.

# @markdown ### **Step 1: Set the checkpoint path**
# @markdown Leave this blank to load the checkpoint we copied over earlier.
# @markdown If you've brought your own checkpoint, specify its path here.
# @markdown
# @markdown **Note**: Most of the time you should leave it blank (as is)
# @markdown when you've chosen to use a prebuilt checkpoint.
# @markdown

custom_timesfm_model_uri = "gs://"  # @param {type: "string"}

if custom_timesfm_model_uri == "gs://" or not custom_timesfm_model_uri:
    print("Deploying prebuilt TimesFM model. ")
    checkpoint_path = model_path_prefix
else:
    print("Deploying custom TimesFM model.")
    checkpoint_path = custom_timesfm_model_uri
print(f"Loading checkpoint from {checkpoint_path}.")

# @markdown ### **Step 2: Choose the accelerator**
# @markdown Select the accelerator type to use to deploy the model.
# @markdown
# @markdown **Note**: Most of the time you can go with CPU only. TimesFM is
# @markdown fast even with the CPU backend. You can only consider GPU if you
# @markdown need a dedicated endpoint to handle large queries per second.
# @markdown
# @markdown **Note**: After deployment, please take a look at the log to get
# @markdown the model / enpoint that you can use in another session.
# @markdown

accelerator_type = "NVIDIA_L4"  # @param ["CPU", "NVIDIA_L4"]
if accelerator_type == "NVIDIA_L4":
    machine_type = "g2-standard-4"
    accelerator_count = 1
elif accelerator_type == "CPU":
    accelerator_type = "ACCELERATOR_TYPE_UNSPECIFIED"
    machine_type = "n1-standard-8"
    accelerator_count = 0
else:
    raise ValueError(
        f"Recommended machine settings not found for: {accelerator_type}. To use"
        " another another accelerator, edit this code block to pass in an"
        " appropriate `machine_type`, `accelerator_type`, and"
        " `accelerator_count` to the deploy_model function by clicking `Show"
        " Code` and then modifying the code."
    )

if accelerator_type != "ACCELERATOR_TYPE_UNSPECIFIED":
    check_quota(
        project_id=PROJECT_ID,
        region=REGION,
        accelerator_type=accelerator_type,
        accelerator_count=accelerator_count,
        is_for_training=False,
    )

print("Quota is OK.")
# @markdown If you want to use other accelerator types not listed above, please
# @markdown check other Vertex AI prediction supported accelerators and regions
# @markdown at https://cloud.google.com/vertex-ai/docs/predictions/configure-compute.
# @markdown You may need to manually set the `machine_type`, `accelerator_type`,
# @markdown and `accelerator_count` in the code by clicking `Show code` first.

# @markdown ### **Step 3: Set the forecast horizon**
# @markdown We need to specify the forecast horizon TimesFM will be queried on
# @markdown to compile its computation. The endpoint will always predict this
# @markdown number of time points in the future, possibly after being rounded
# @markdown up to the closest multiplier of the model output patch length.
# @markdown Make sure to set it to the potential maximum for your usecase.
horizon = 256  # @param {type:"number"}
print("Creating endpoint.")
model, endpoint = deploy_model(
    model_name=f"timesfm-{MODEL_VARIANT}",
    checkpoint_path=checkpoint_path,
    horizon=horizon,
    machine_type=machine_type,
    accelerator_type=accelerator_type,
    accelerator_count=accelerator_count,
)

## Query TimesFM

An endpoint prediction request looks like
```python
endpoint.predict(instances=[{"input": [...], "freq": 0}, ...])
```

Now we can query the endpoint to forecast on input time series. Let's first start with some sanity checks.


In [None]:
# @title Create a helper function to visulize forecasts.
import matplotlib.pyplot as plt
import numpy as np


def visualize_forecast(
    context: list[float],
    horizon_mean: list[float],
    ground_truth: list[float] | None = None,
    horizon_lower: list[float] | None = None,
    horizon_upper: list[float] | None = None,
    ylabel: str | None = None,
    title: str | None = None,
):
    plt_range = list(range(len(context) + len(horizon_mean)))
    plt.figure(figsize=(5, 3))
    plt.plot(
        plt_range,
        context + [np.nan for _ in horizon_mean],
        color="tab:cyan",
        label="context",
    )
    plt.plot(
        plt_range,
        [np.nan for _ in context] + horizon_mean,
        color="tab:red",
        label="forecast",
    )
    if ground_truth:
        plt.plot(
            list(range(len(context) + len(ground_truth))),
            [np.nan for _ in context] + ground_truth,
            color="tab:purple",
            label="ground truth",
        )
    if horizon_upper and horizon_lower:
        plt.plot(
            plt_range,
            [np.nan for _ in context] + horizon_upper,
            color="tab:orange",
            linestyle="--",
            label="forecast, upper",
        )
        plt.plot(
            plt_range,
            [np.nan for _ in context] + horizon_lower,
            color="tab:orange",
            linestyle=":",
            label="forecast, lower",
        )
        plt.fill_between(
            plt_range,
            [np.nan for _ in context] + horizon_upper,
            [np.nan for _ in context] + horizon_lower,
            color="tab:orange",
            alpha=0.2,
        )
    if ylabel:
        plt.ylabel(ylabel)
    if title:
        plt.title(title)
    plt.xlabel("time")
    plt.legend()
    plt.show()

### Sanity checks


In [None]:
# @markdown We first check TimesFM on some sinusoidals. Pay attention to how
# @markdown we are calling the endpoints.
# Prepare the context. Notice each of them has a different context length.
# Note: this is strictly how the query should be structed:
instances = [
    {"input": np.sin(np.linspace(0, 20, 100)).tolist(), "freq": 0},
    {"input": np.sin(np.linspace(0, 40, 500)).tolist(), "freq": 0},
    {
        "input": (
            np.sin(np.linspace(0, 50, 300)) + np.sin(np.linspace(1, 71, 300)) * 0.5
        ).tolist(),
        "freq": 0,
    },
]

# Query the endpoint.
results = endpoint.predict(instances=instances)

In [None]:
# @markdown Now we visualize the response. Make sure the model makes legit
# @markdown forecasts on those curves, and we move on to real world data.

# There's bunch of important stuff in the results. Here we focus on results[0]:
# This is the TimesFM response.
print(results[0][0].keys())
visualize_forecast(
    instances[0]["input"], results[0][0]["point_forecast"], title="Sinusoidal 1"
)
visualize_forecast(
    instances[1]["input"], results[0][1]["point_forecast"], title="Sinusoidal 2"
)
visualize_forecast(
    instances[2]["input"], results[0][2]["point_forecast"], title="Sinusoidal 3"
)

### Point forecast

Let's use a real world dataset from Kaggle on the [daily temperatures in Delhi, India](https://www.kaggle.com/datasets/sumanthvrao/daily-climate-time-series-data/data). Make sure you've set the Kaggle credentials following [these instructions](https://github.com/Kaggle/kaggle-api/blob/main/docs/README.md#api-credentials).


In [None]:
# Install the dependencies.
! pip install kaggle

In [None]:
# Download and prepare the dataset
! kaggle datasets download sumanthvrao/daily-climate-time-series-data
! unzip /content/daily-climate-time-series-data.zip

In [None]:
import pandas as pd

data = pd.read_csv("/content/DailyDelhiClimateTrain.csv")
data

In [None]:
# We manually prepare 3 forecast tasks:
# 1. Use day 0 - 199 to forecast day 200-299.
# 2. Use day 300 - 599 to forecast day 600-699.
# 3. Use day 700 - 1200 to forecast day 1200 - 1299.
temperature = data.meantemp.to_list()
inputs = [temperature[0:200], temperature[300:600], temperature[700:1200]]
ground_truths = [
    temperature[200:300],
    temperature[600:700],
    temperature[1200:1300],
]
response = endpoint.predict(
    instances=[{"input": each_input, "freq": 0} for each_input in inputs]
)
response[0][0].keys()

This `response` is structured that:
* `response[0][i]` is the forecast result of the ith input inside `instances`.
* `response[0][i]` has three keys:
 - `point_forecast`: the mean point forecast
 - `quantiles`: the schema of the quantile outputs
 - `quantile_forecast`: for each time stamp in the horizon this will be a list whose elements are the corresponding quantiles as denoted in the `quantiles` schema.
   

In [None]:
# Visualize the response
for task_i in range(3):
    visualize_forecast(
        inputs[task_i],
        response[0][task_i]["point_forecast"][:100],
        ground_truth=ground_truths[0],
        title=f"Daily temperature in Delhi, India, Task {task_i+1}",
        ylabel="Temperature (°C)",
    )

### Anomaly detection

As of checkpoint TimesFM-1.0-200m, TimesFM is capable of outputing quantile forecasts as well. These are uncalibrated forecasts and are experimental. But please feel free to play with them to see what you can do with them.

Here we show how these outputs can potentially serve as anomaly detectors, when we define the anomaly as something beyond a certain range of TimesFM forecasts. In this example we are drawing bands defined by the 30th and the 70th percentiles on the same tasks we did in the last section. Anything outside of the bands could be an "anomaly".

In [None]:
# Visualize the response
for task_i in range(3):
    visualize_forecast(
        inputs[task_i],
        response[0][task_i]["point_forecast"][:100],
        ground_truth=ground_truths[0],
        horizon_lower=[x[3] for x in response[0][task_i]["quantile_forecast"]][:100],
        horizon_upper=[x[7] for x in response[0][task_i]["quantile_forecast"]][:100],
        title=f"Daily temperature in Delhi, India, Task {task_i+1}",
        ylabel="Temperature (°C)",
    )

## Clean up resources

In [None]:
# @title Releasing endpoint and model
# @markdown Delete the experiment models and endpoints to recycle the resources
# @markdown and avoid unnecessary continouous charges that may incur.

# Undeploy model and delete endpoint.
endpoint.delete(force=True)

# Delete models.
if model:
    model.delete()

delete_bucket = False  # @param {type:"boolean"}
if delete_bucket:
    ! gsutil -m rm -r $BUCKET_URI