In [1]:
import logging
import os
import json
import subprocess
import time
from collections import namedtuple
from pathlib import Path

import google.cloud.aiplatform as aiplatform
from google.cloud import storage

logging.getLogger().setLevel(logging.INFO)

In [2]:
APP_NAME = 'vit'
MODEL_PT_FILEPATH = 'saved_models/VisionTransformers'
MAR_MODEL_OUT_PATH = 'serve'
handler = 'predictor/handler.py'
MODEL_DISPLAY_NAME = 'ViT-model'
model_version = 1
PROJECT_ID = 'alberto-playground'
BUCKET_NAME = 'alberto-vit-playground'
CUSTOM_PREDICTOR_IMAGE_URI = f"gcr.io/{PROJECT_ID}/pytorch_predict_{APP_NAME}"

In [3]:
aiplatform.init(project=PROJECT_ID, staging_bucket=BUCKET_NAME)

In [4]:
# # create directory to save model archive file
# model_output_root = MODEL_PT_FILEPATH
# mar_output_root = MAR_MODEL_OUT_PATH
# additiona_files_base_dir = 'src/model'
# export_path = f"{mar_output_root}/model-store"
# try:
#     Path(export_path).mkdir(parents=True, exist_ok=True)
# except Exception as e:
#     logging.warning(e)
#     # retry after pause
#     time.sleep(2)
#     Path(export_path).mkdir(parents=True, exist_ok=True)
#
# # parse and configure paths for model archive config
# handler_path = (
#     handler.replace("gs://", "/gcs/") + "predictor/handler.py"
#     if handler.startswith("gs://")
#     else handler
# )
# model_artifacts_dir = model_output_root
# extra_files = [
#     os.path.join(additiona_files_base_dir, f)
#     for f in os.listdir(additiona_files_base_dir)]
#
# # define model archive config
# mar_config = {
#     "MODEL_NAME": MODEL_DISPLAY_NAME,
#     "HANDLER": handler_path,
#     "SERIALIZED_FILE": f'{model_artifacts_dir}/ViT.pt',
#     "VERSION": model_version,
#     "EXTRA_FILES": ",".join(extra_files),
#     "EXPORT_PATH": export_path,
# }
#
# # generate model archive command
# archiver_cmd = (
#     "torch-model-archiver --force "
#     f"--model-name {mar_config['MODEL_NAME']} "
#     f"--serialized-file {mar_config['SERIALIZED_FILE']} "
#     f"--handler {mar_config['HANDLER']} "
#     f"--version {mar_config['VERSION']}"
# )
# if "EXPORT_PATH" in mar_config:
#     archiver_cmd += f" --export-path {mar_config['EXPORT_PATH']}"
# if "EXTRA_FILES" in mar_config:
#     archiver_cmd += f" --extra-files {mar_config['EXTRA_FILES']}"
# if "REQUIREMENTS_FILE" in mar_config:
#     archiver_cmd += f" --requirements-file {mar_config['REQUIREMENTS_FILE']}"
#
# # run archiver command
# logging.warning("Running archiver command: %s", archiver_cmd)
# with subprocess.Popen(
#         archiver_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
# ) as p:
#     _, err = p.communicate()
#     if err:
#         raise ValueError(err)




In [5]:
bucket = storage.Client().bucket(BUCKET_NAME)
blob = bucket.blob(f'{MAR_MODEL_OUT_PATH}/ViT-model.mar')
blob.upload_from_filename('serve/model-store/ViT-model.mar')

In [6]:
! docker build -f predictor/Dockerfile -t $CUSTOM_PREDICTOR_IMAGE_URI ./

Sending build context to Docker daemon  394.7MB
Step 1/21 : FROM pytorch/torchserve:latest-cpu
 ---> 68a3fcae81af
Step 2/21 : USER root
 ---> Using cache
 ---> 74b7dbf2b479
Step 3/21 : RUN apt-get update &&     apt-get install -y software-properties-common &&     add-apt-repository -y ppa:ubuntu-toolchain-r/test &&     apt-get update &&     apt-get install -y gcc-9 g++-9 apt-transport-https ca-certificates gnupg curl
 ---> Using cache
 ---> 6e360930db3d
Step 4/21 : RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.cloud.google.com/apt cloud-sdk main" |     tee -a /etc/apt/sources.list.d/google-cloud-sdk.list &&     curl https://packages.cloud.google.com/apt/doc/apt-key.gpg |     apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - &&     apt-get update -y &&     apt-get install google-cloud-sdk -y
 ---> Using cache
 ---> bfe0359200e4
Step 5/21 : USER model-server
 ---> Using cache
 ---> 2d2bde191019
Step 6/21 : RUN python3 -m pip install --upgrade p

In [7]:
!docker push $CUSTOM_PREDICTOR_IMAGE_URI

Using default tag: latest
The push refers to repository [gcr.io/alberto-playground/pytorch_predict_vit]

[1Bcb124e95: Preparing 
[1Bc1975f18: Preparing 
[1Be1b71f7f: Preparing 
[1B7b334d17: Preparing 
[1B001bafce: Preparing 
[1Bbf18a086: Preparing 
[1B7cf25f52: Preparing 
[1Bfa8107fa: Preparing 
[1B24bd1a34: Preparing 
[1B0b544b4c: Preparing 
[1B613e1d99: Preparing 
[1Bb3c8b2c4: Preparing 
[1B0ae33361: Preparing 
[13B1975f18: Pushed   11.84MB/11.84MB[10A[2K[8A[2K[7A[2K[4A[2K[1A[2K[13A[2K[13A[2K[13A[2K[13A[2K[13A[2K[14A[2K[13A[2Klatest: digest: sha256:0c84648e648f80fd0f6dbcc4cb99b33403160df541dc9b9dff89b22df1a033e4 size: 3253


In [8]:
model_display_name = f"{APP_NAME}-v{model_version}"
model_description = "PyTorch Image classifier with custom container"

MODEL_NAME = APP_NAME
health_route = "/ping"
predict_route = f"/predictions/{MODEL_NAME}"
serving_container_ports = [7080]

In [9]:

model = aiplatform.Model.upload(
    display_name=model_display_name,
    description=model_description,
    serving_container_image_uri=CUSTOM_PREDICTOR_IMAGE_URI,
    serving_container_predict_route=predict_route,
    serving_container_health_route=health_route,
    serving_container_ports=serving_container_ports,
    artifact_uri=f'gs://{BUCKET_NAME}/{MAR_MODEL_OUT_PATH}',
)

model.wait()

print(model.display_name)
print(model.resource_name)

Creating Model


INFO:google.cloud.aiplatform.models:Creating Model


Create Model backing LRO: projects/634066980332/locations/us-central1/models/8835704028110258176/operations/8659418435475734528


INFO:google.cloud.aiplatform.models:Create Model backing LRO: projects/634066980332/locations/us-central1/models/8835704028110258176/operations/8659418435475734528


Model created. Resource name: projects/634066980332/locations/us-central1/models/8835704028110258176@1


INFO:google.cloud.aiplatform.models:Model created. Resource name: projects/634066980332/locations/us-central1/models/8835704028110258176@1


To use this Model in another session:


INFO:google.cloud.aiplatform.models:To use this Model in another session:


model = aiplatform.Model('projects/634066980332/locations/us-central1/models/8835704028110258176@1')


INFO:google.cloud.aiplatform.models:model = aiplatform.Model('projects/634066980332/locations/us-central1/models/8835704028110258176@1')


vit-v1
projects/634066980332/locations/us-central1/models/8835704028110258176


In [10]:
endpoint_display_name = f"{APP_NAME}-endpoint"
endpoint = aiplatform.Endpoint.create(display_name=endpoint_display_name)

Creating Endpoint


INFO:google.cloud.aiplatform.models:Creating Endpoint


Create Endpoint backing LRO: projects/634066980332/locations/us-central1/endpoints/1493361090891874304/operations/3205559286730063872


INFO:google.cloud.aiplatform.models:Create Endpoint backing LRO: projects/634066980332/locations/us-central1/endpoints/1493361090891874304/operations/3205559286730063872


Endpoint created. Resource name: projects/634066980332/locations/us-central1/endpoints/1493361090891874304


INFO:google.cloud.aiplatform.models:Endpoint created. Resource name: projects/634066980332/locations/us-central1/endpoints/1493361090891874304


To use this Endpoint in another session:


INFO:google.cloud.aiplatform.models:To use this Endpoint in another session:


endpoint = aiplatform.Endpoint('projects/634066980332/locations/us-central1/endpoints/1493361090891874304')


INFO:google.cloud.aiplatform.models:endpoint = aiplatform.Endpoint('projects/634066980332/locations/us-central1/endpoints/1493361090891874304')


In [11]:
traffic_percentage = 100
machine_type = "n1-standard-4"
deployed_model_display_name = model_display_name
min_replica_count = 1
max_replica_count = 3
sync = True

model.deploy(
    endpoint=endpoint,
    deployed_model_display_name=deployed_model_display_name,
    machine_type=machine_type,
    traffic_percentage=traffic_percentage,
    sync=sync,
)

Deploying model to Endpoint : projects/634066980332/locations/us-central1/endpoints/1493361090891874304


INFO:google.cloud.aiplatform.models:Deploying model to Endpoint : projects/634066980332/locations/us-central1/endpoints/1493361090891874304


Deploy Endpoint model backing LRO: projects/634066980332/locations/us-central1/endpoints/1493361090891874304/operations/9199850390760194048


INFO:google.cloud.aiplatform.models:Deploy Endpoint model backing LRO: projects/634066980332/locations/us-central1/endpoints/1493361090891874304/operations/9199850390760194048


Endpoint model deployed. Resource name: projects/634066980332/locations/us-central1/endpoints/1493361090891874304


INFO:google.cloud.aiplatform.models:Endpoint model deployed. Resource name: projects/634066980332/locations/us-central1/endpoints/1493361090891874304


<google.cloud.aiplatform.models.Endpoint object at 0x7fa3a92c19c0> 
resource name: projects/634066980332/locations/us-central1/endpoints/1493361090891874304

In [17]:
endpoint_display_name = f"{APP_NAME}-endpoint"
filter = f'display_name="{endpoint_display_name}"'

for endpoint_info in aiplatform.Endpoint.list(filter=filter):
    print(
        f"Endpoint display name = {endpoint_info.display_name} resource id ={endpoint_info.resource_name} "
    )

endpoint = aiplatform.Endpoint(endpoint_info.resource_name)

Endpoint display name = vit-endpoint resource id =projects/634066980332/locations/us-central1/endpoints/1493361090891874304 


In [18]:
endpoint.list_models()

[id: "3764008732900458496"
 model: "projects/634066980332/locations/us-central1/models/8835704028110258176"
 display_name: "vit-v1"
 create_time {
   seconds: 1690256597
   nanos: 595458000
 }
 dedicated_resources {
   machine_spec {
     machine_type: "n1-standard-4"
   }
   min_replica_count: 1
   max_replica_count: 1
 }
 model_version_id: "1"]

In [22]:
test_images = 'gs://alberto-vit-playground/samples'

In [29]:
test_instance = [{"input": test_images}]

In [31]:
prediction = endpoint.predict(instances=test_instance)

ServiceUnavailable: 503 Model server is not available. Please retry. endpoint_id: 1493361090891874304, deployed_model_id: 3764008732900458496

In [32]:
from typing import Dict, List, Union

from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value


def predict_custom_trained_model_sample(
    project: str,
    endpoint_id: str,
    instances: Union[Dict, List[Dict]],
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
    """
    `instances` can be either single instance of type dict or a list
    of instances.
    """
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
    # The format of each instance should conform to the deployed model's prediction input schema.
    instances = instances if type(instances) == list else [instances]
    instances = [
        json_format.ParseDict(instance_dict, Value()) for instance_dict in instances
    ]
    parameters_dict = {}
    parameters = json_format.ParseDict(parameters_dict, Value())
    endpoint = client.endpoint_path(
        project=project, location=location, endpoint=endpoint_id
    )
    response = client.predict(
        endpoint=endpoint, instances=instances, parameters=parameters
    )
    print("response")
    print(" deployed_model_id:", response.deployed_model_id)
    # The predictions are a google.protobuf.Value representation of the model's predictions.
    predictions = response.predictions
    for prediction in predictions:
        print(" prediction:", dict(prediction))


# [END aiplatform_predict_custom_trained_model_sample]

In [33]:
predict_custom_trained_model_sample(
    project="634066980332",
    endpoint_id="1493361090891874304",
    location="us-central1",
    instances={ "instance_key_1": "value"}
)

ServiceUnavailable: 503 Model server is not available. Please retry. endpoint_id: 1493361090891874304, deployed_model_id: 3764008732900458496