In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Online Prediction PSC based private endpint

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/prediction/get_started_with_psc_private_endpoint.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fofficial%2Fprediction%2Fget_started_with_psc_private_endpoint.ipynb">
      <img width="32px" src="https://cloud.google.com/ml-engine/images/colab-enterprise-logo-32px.png" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>    
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/official/prediction/get_started_with_psc_private_endpoint.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/prediction/get_started_with_psc_private_endpoint.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

## Overview

Compared to the current PSA Private Endpoint, PSC based Private Endpoint has the following benefits:
1. Simpler setup process: Currently, the only extra step user need to do is to create an Endpoint in their VPC. And this will be done by PSC automatically before our GA launch.

2. No more IP exhuasted issue: GKE cluster will be hosted in tenant project VPC, so we can create much bigger cluster and won't affected by ip exhuasted issue in User's VPC.

3. Unified experience with public endpoint: The API is the same as public endpoint, so user can use our SDK/client library. We also provide quota, IAM and monitoring metrics as public endpoint does.


## Get started

### Install Vertex AI SDK for Python and other required packages

In [None]:
! pip3 install --upgrade --user --quiet google-cloud-aiplatform

### Restart runtime (Colab only)

To use the newly installed packages, you must restart the runtime on Google Colab.

In [None]:
import sys

if "google.colab" in sys.modules:

    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Wait until it's finished before continuing to the next step. ⚠️</b>
</div>


### Authenticate your notebook environment (Colab only)

Authenticate your environment on Google Colab.


In [None]:
import sys

if "google.colab" in sys.modules:

    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information and initialize Vertex AI SDK for Python

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com). Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}

In [None]:
# Create GCS Bucket
BUCKET_URI = "gs://your-bucket-name-unique"  # @param {type:"string"}
! gsutil mb -l {LOCATION} -p {PROJECT_ID} {BUCKET_URI}

In [None]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=LOCATION, staging_bucket=BUCKET_URI)

## Prepare Test Models

We prepared some test models, feel free to use your own models.

In [None]:
# Copy Models to the Bucket
! gsutil cp -r "gs://cloud-samples-data/vertex-ai/prediction/test-models-requests/*" {BUCKET_URI}

### Upload Model

In [None]:
# Depending on which model you wanna use, uncomment the corresponding section below and run the block.

# TF Model
DISPLAY_NAME = "tensorflow model"  # @param {type:"string"}
ARTIFACT_URI = BUCKET_URI + "/tensorflow"
IMAGE_URI = "us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-12:latest"
REQUEST_FILE = "tensorflow_request.json"


# Pytorch Model
# DISPLAY_NAME="Pytorch model"
# ARTIFACT_URI=BUCKET_URI+"/pytorch"
# IMAGE_URI="us-docker.pkg.dev/vertex-ai/prediction/pytorch-cpu.2-0:latest"
# REQUEST_FILE="pytorch_request.json"


# Sklearn Model
# DISPLAY_NAME="Sklearn model"
# ARTIFACT_URI=BUCKET_URI+"/sklearn"
# IMAGE_URI="us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-2:latest"
# REQUEST_FILE="sklearn_request.json"


# xgboost Model
# DISPLAY_NAME="xgboost model"
# ARTIFACT_URI=BUCKET_URI+"/xgboost"
# IMAGE_URI="us-docker.pkg.dev/vertex-ai/prediction/xgboost-cpu.1-7:latest"
# REQUEST_FILE="xgboost_request.json"

In [None]:
model = aiplatform.Model.upload(
    display_name=DISPLAY_NAME,
    artifact_uri=ARTIFACT_URI,
    serving_container_image_uri=IMAGE_URI,
    sync=False,
)

model.wait()

### Create PSC based Prediction Private Endpoint


In [None]:
psc_endpoint = aiplatform.PrivateEndpoint.create(
    display_name="psc-endpoint",
    project=PROJECT_ID,
    location=LOCATION,
    private_service_connect_config=aiplatform.PrivateEndpoint.PrivateServiceConnectConfig(
        project_allowlist=[PROJECT_ID],
    ),
)

Alternatively, send http call to create endpoint. You need to manually replace ALL the variables below

In [None]:
# ! curl -X POST -H "Content-Type: application/json" -H "Authorization: Bearer `gcloud auth print-access-token`" https://${LOCATION}-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/{LOCATION}/endpoints -d \
# '{ \
#     displayName: "psc-endpoint", \
#     privateServiceConnectConfig: { \
#       enablePrivateServiceConnect: true, \
#       projectAllowlist: ["{PROJECT_ID}"] \
#     }, \
# }'

### Deploy Model

In [None]:
psc_endpoint.deploy(model=model, traffic_percentage=100, machine_type="e2-standard-8")

psc_endpoint.list_models()

### Create Forwarding Rule in Consumer Project

First, find the service attachment from the endpoint and deployed model.

In [None]:
service_attachment = psc_endpoint.list_models()[0].private_endpoints.service_attachment
print(service_attachment)

Then, create an address and a forwarding rule targeting at the service attachment. In this example, default network and subnet are used, replace it with your VPC network and subnet if running in your VPC.

In [None]:
! gcloud compute addresses create psc-prediction \
    --region={LOCATION} \
    --subnet=default

! gcloud compute forwarding-rules create op-psc-endpoint \
    --network=default \
    --address=psc-prediction \
    --target-service-attachment={service_attachment} \
    --region={LOCATION}

Save the IP address above.

In [None]:
IP_ADDRESS = ! gcloud compute forwarding-rules describe op-psc-endpoint --region={LOCATION} --format='value(IPAddress)'
IP_ADDRESS = IP_ADDRESS[0]
print(IP_ADDRESS)

## Make Predictions

From this point, all the code below must be run from a GCP VM in the same VPC, same region as your PSC Endpoint.

If you're using Vertex AI Workbench or Colab Enterprise, you should be good.

If you're creating a GCE VM, please make sure Cloud Platform access scope is enabled.

In [None]:
# Download the requests files:
! gsutil cp {BUCKET_URI}/requests/* ./

In [None]:
import os

if not os.getenv("IS_TESTING"):
    import json

    import urllib3

    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

    with open(REQUEST_FILE) as json_file:
        data = json.load(json_file)
        response = psc_endpoint.predict(
            instances=data["instances"], endpoint_override=IP_ADDRESS
        )
        print(response)

### Predict Requests

Alternatively, you can send HTTP requests directly to the IP address. Make sure to replace all variabled in the requests

In [None]:
ENDPOINT_RESOURCE_NAME = psc_endpoint.resource_name

In [None]:
import os

if not os.getenv("IS_TESTING"):
    # Predict
    ! curl --insecure -H "Content-Type: application/json" -H "Authorization: Bearer `gcloud auth print-access-token`"  https://{IP_ADDRESS}/v1/{ENDPOINT_RESOURCE_NAME}:predict -d@{REQUEST_FILE}

    # # RawPredict
    # ! curl -v --insecure -H "Content-Type: application/json" -H "Authorization: Bearer `gcloud auth print-access-token`" https://{IP_ADDRESS}/v1/{ENDPOINT_RESOURCE_NAME}:rawPredict -d@{REQUEST_FILE}

### Deploy another model and update traffic split

Deploy another model, and update the traffic split to be 50:50, after the deployment is done, you can rerun the prediction again for multiple times, you should be able to see the deployed_model_id are different.

In [None]:
psc_endpoint.deploy(model=model, traffic_percentage=50, machine_type="e2-standard-8")

In [None]:
import os

if not os.getenv("IS_TESTING"):
    import json

    import urllib3

    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

    counter = {}
    with open(REQUEST_FILE) as json_file:
        data = json.load(json_file)
        for i in range(1000):
            response = psc_endpoint.predict(
                instances=data["instances"], endpoint_override=IP_ADDRESS
            )
            if response.deployed_model_id in counter.keys():
                counter[response.deployed_model_id] += 1
            else:
                counter[response.deployed_model_id] = 1
    print(counter)

You can update the traffic split with the following command and run the code above again.

In [None]:
import os

if not os.getenv("IS_TESTING"):
    deployed_model_id_0 = list(counter)[0]
    deployed_model_id_1 = list(counter)[1]

    psc_endpoint.update(
        traffic_split={deployed_model_id_0: 20, deployed_model_id_1: 80}
    )

## Cleanup

In [None]:
psc_endpoint.undeploy_all()
psc_endpoint.delete()
model.delete()

In [None]:
! gcloud compute forwarding-rules delete op-psc-endpoint --region={LOCATION}  --quiet

! gcloud compute addresses delete psc-prediction --region={LOCATION} --quiet

Delete the bucket if needed.

In [None]:
! gsutil rm -r {BUCKET_URI}

Optionally, you can use the following command to clean up all private endpoint and models if needed.

In [None]:
for pe in aiplatform.PrivateEndpoint.list():
    pe.undeploy_all()
    pe.delete()