In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Pic2Word Model serving on Vertex AI

## Overview

This notebook demonstrates how to use the [Pic2Word](https://github.com/google-research/composed_image_retrieval) model in Vertex AI Model Garden. 

## Objective

Follow the notebook you will conduct experiments using the pre-built docker images on local (optional) and on Vertex AI.

- Run local inferences for pretrained Pic2Word models

- Deploy pretrained Pic2Word models in Google Cloud Vertex AI

## Dataset

We use the [COCO](https://cocodataset.org/#home) validation set (5,000 images) for evaluation.

## Costs

This tutorial uses billable components of Google Cloud:

- Vertex AI
- Cloud Storage

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing?_ga=2.46650789.-341051769.1686949237) and [Cloud Storage pricing](https://cloud.google.com/storage/pricing?_ga=2.46650789.-341051769.1686949237), and use the [Pricing Calculator](https://cloud.google.com/products/calculator/?_ga=2.247379078.-341051769.1686949237) to generate a cost estimate based on your projected usage.

## Setup environment

### Setup cloud project

1. [Select or create a Google Cloud project](https://console.cloud.google.com/cloud-resource-manager). When you first create an account, you get a $300 free credit towards your compute/storage costs.

1. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).

1. [Enable Artifact Registry](https://cloud.google.com/artifact-registry/docs/enable-service) and [create a repository](https://cloud.google.com/artifact-registry/docs/repositories/create-repos) for storing docker images.

1. [Create a GCS bucket](https://cloud.google.com/storage/docs/creating-buckets) for storing experiment outputs.

1. [Enable the Vertex AI API and Compute Engine API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com,compute_component).

### Install dependencies

In [None]:
!pip install -r composed_image_retrieval/requirements.txt

It's highly recommended to run this notebook on [Vertex AI workbench](https://cloud.google.com/vertex-ai-workbench).

If you are running this notebook locally, you will need to install the [Cloud SDK](https://cloud.google.com/sdk) and [gsutil](https://cloud.google.com/storage/docs/gsutil_install).

### Setup variables

In [None]:
# Cloud project setup.
PROJECT_ID = "automl-migration-test"
REGION = "us-central1"
GCS_BUCKET = "pic2word-bucket"

# The pre-built docker image
SERVE_DOCKER_URI = "us-central1-docker.pkg.dev/automl-migration-test/pic2word-repo/pic2word_serve:latest"

# The serving port.
SERVE_PORT = 7080

# The path to model checkpoint file.
MODEL_PT_PATH = "gs://pic2word-bucket/checkpoint/pic2word_model.pt"
MODEL_NAME = "pic2word"

## [Optional] Run serving jobs locally

### Build and push serving docker image

NOTE: Users won't build docker images on their own, they will use our pre-built docker images directly.

In [None]:
!docker build -f serve.Dockerfile . -t {SERVE_DOCKER_URI}
!docker push {SERVE_DOCKER_URI}

In [None]:
LOCAL_SERVE_URL = f"http://localhost:{SERVE_PORT}/predictions/pic2word_serving"

### Image retrieval

In [None]:
# Run the serving container. 
!nvidia-docker run -t --rm \
-p 7080:7080 \
-e NVIDIA_DISABLE_REQUIRE=1 \
-e CUDA_VISIBLE_DEVICES=0 \
{SERVE_DOCKER_URI}

In [None]:
import json

payload = json.dumps(
    {"query": "a bunch of *", "image_path": "pic2word-bucket/model_io/input/"}
)
response = requests.post(
    LOCAL_SERVE_URL,
    data=payload,
    headers={"content-type": "application/json", "Accept-Charset": "UTF-8"},
)
print(response)

## Deploy model for online prediction

This section uploads the model to Vertex Model Registry and deploys it on an Endpoint resource. 

### Upload and deploy model to Vertex AI

In [None]:
from google.cloud import aiplatform

# Init common setup.
aiplatform.init(
    project="automl-migration-test",
    location="us-central1",
    staging_bucket="pic2word-bucket",
)


# Upload model.
serving_env = {}
model = aiplatform.Model.upload(
    display_name="pic2word-model-display",
    serving_container_image_uri="us-central1-docker.pkg.dev/google.com/jismailyan-test/jismailyan-docker-repo/pic2word_serve:latest",
    serving_container_ports=[SERVE_PORT],
    serving_container_predict_route="/predictions/pic2word",
    serving_container_health_route="/ping",
    serving_container_environment_variables=serving_env,
)
# Or reuse a pre-uploaded model.
# model = aiplatform.Model('projects/123456789/locations/us-central1/models/123456789@1')

# Create an endpoint.
endpoint = aiplatform.Endpoint.create(display_name="pytorch-pic2word-endpoint")
# Or reuse a pre-created endpoint.
# endpoint = aiplatform.Endpoint('projects/123456789/locations/us-central1/endpoints/123456789')

# Deploy model to endpoint.
model.deploy(
    endpoint=endpoint,
    machine_type="n1-standard-8",
    accelerator_type="NVIDIA_TESLA_T4",
    accelerator_count=1,
    traffic_percentage=100,
)

You can manage your uploaded models in the [Model Registry](https://pantheon.corp.google.com/vertex-ai/models) and your endpoints in the [Endpoints](https://pantheon.corp.google.com/vertex-ai/endpoints).

## Send a prediction request to the endpoint

In [None]:
endpoint = aiplatform.Endpoint(
    "projects/6924728003/locations/us-central1/endpoints/4124413251322642432"
)

payload = json.dumps(
    {"query": "a bunch of *", "image_path": "pic2word-bucket/model_io/input/"}
)
response = endpoint.predict(payload).predictions
print(response)