### Install TorchServe and AI Platform.

In [1]:
!pip install torchserve torch-model-archiver torch-workflow-archiver
!pip install google-cloud-aiplatform

Collecting torchserve
  Downloading torchserve-0.7.0-py3-none-any.whl (19.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.6/19.6 MB[0m [31m56.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting torch-model-archiver
  Downloading torch_model_archiver-0.7.0-py3-none-any.whl (14 kB)
Collecting torch-workflow-archiver
  Downloading torch_workflow_archiver-0.2.6-py3-none-any.whl (12 kB)
Collecting future
  Downloading future-0.18.3.tar.gz (840 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m840.9/840.9 kB[0m [31m74.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting enum-compat
  Downloading enum_compat-0.0.3-py3-none-any.whl (1.3 kB)
Building wheels for collected packages: future
  Building wheel for future (setup.py) ... [?25ldone
[?25h  Created wheel for future: filename=future-0.18.3-py3-none-any.whl size=492025 sha256=0dac2c3101791f5adcc41f340d80a46ff9ab785ea7b42cbb69d6716c87c0dc

In [2]:
# Automatically restart kernel after installs
import os

if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

In [1]:
!mkdir model_artifacts

mkdir: cannot create directory ‘model_artifacts’: File exists


### Create the customized handler that will be used by the TorchServe.

In [2]:
%%writefile model_artifacts/handler.py

"""Customized handler for stable diffusion 2."""
import base64
import logging
from io import BytesIO

import torch
from diffusers import EulerDiscreteScheduler
from diffusers import StableDiffusionPipeline
from ts.torch_handler.base_handler import BaseHandler

logger = logging.getLogger(__name__)
model_id = 'stabilityai/stable-diffusion-2'


class ModelHandler(BaseHandler):

  def __init__(self):
    self.initialized = False
    self.map_location = None
    self.device = None
    self.use_gpu = True
    self.store_avg = True
    self.pipe = None

  def initialize(self, context):
    """Initializes the pipe."""
    properties = context.system_properties
    gpu_id = properties.get('gpu_id')

    self.map_location, self.device, self.use_gpu = \
      ('cuda', torch.device('cuda:' + str(gpu_id)),
       True) if torch.cuda.is_available() else \
        ('cpu', torch.device('cpu'), False)

    # Use the Euler scheduler here instead
    scheduler = EulerDiscreteScheduler.from_pretrained(model_id,
                                                       subfolder='scheduler')
    pipe = StableDiffusionPipeline.from_pretrained(model_id,
                                                   scheduler=scheduler,
                                                   torch_dtype=torch.float16)
    pipe = pipe.to('cuda')
    # Uncomment the following line to reduce the GPU memory usage.
    # pipe.enable_attention_slicing()
    self.pipe = pipe

    self.initialized = True

  def preprocess(self, requests):
    """Noting to do here."""
    logger.info('requests: %s', requests)
    return requests

  def inference(self, preprocessed_data, *args, **kwargs):
    """Run the inference."""
    images = []
    for pd in preprocessed_data:
      prompt = pd['prompt']
      images.extend(self.pipe(prompt).images)
    return images

  def postprocess(self, output_batch):
    """Converts the images to base64 string."""
    postprocessed_data = []
    for op in output_batch:
      fp = BytesIO()
      op.save(fp, format='JPEG')
      postprocessed_data.append(base64.b64encode(fp.getvalue()).decode('utf-8'))
      fp.close()
    return postprocessed_data


Writing model_artifacts/handler.py


### Create TorchServe model archive file.

In [3]:
!torch-model-archiver \
  -f \
  --model-name model \
  --version 1.0 \
  --handler model_artifacts/handler.py \
  --export-path model_artifacts

In [4]:
!ls model_artifacts

handler.py  model.mar


In [None]:
GCS_PATH = "" # change this to a gcs path

In [None]:
!gsutil cp -r model_artifacts $GCS_PATH

In [None]:
PROJECT_ID = "yuti-test"  # <---CHANGE THIS TO YOUR PROJECT
CUSTOM_PREDICTOR_IMAGE_URI = f"us-docker.pkg.dev/vertex-ai/prediction/pytorch-gpu.1-12:latest"
print(f"CUSTOM_PREDICTOR_IMAGE_URI = {CUSTOM_PREDICTOR_IMAGE_URI}")

### Push to the Vertex AI endpoint.

In [None]:
from google.cloud import aiplatform
aiplatform.init(project=PROJECT_ID, staging_bucket=BUCKET_NAME)

In [None]:
VERSION = 1
model_display_name = "stable_diffusion_2"
model_description = "stable_diffusion_2 container"

In [None]:
model = aiplatform.Model.upload(
    display_name=model_display_name,
    description=model_description,
    serving_container_image_uri=CUSTOM_PREDICTOR_IMAGE_URI,
    artifact_uri=GCS_PATH,
)

model.wait()

print(model.display_name)
print(model.resource_name)

In [None]:
endpoint_display_name = f"{APP_NAME}-endpoint"
endpoint = aiplatform.Endpoint.create(display_name=endpoint_display_name)

In [None]:
traffic_percentage = 100
machine_type = "n1-standard-4"
accelerator_type = "NVIDIA_TESLA_T4"
accelerator_count = 1
deployed_model_display_name = model_display_name
min_replica_count = 1
max_replica_count = 1
sync = True

model.deploy(
    endpoint=endpoint,
    deployed_model_display_name=deployed_model_display_name,
    machine_type=machine_type,
    accelerator_type=accelerator_type,
    accelerator_count=accelerator_count,
    traffic_percentage=traffic_percentage,
    sync=sync,
)

### Testing

In [None]:
%%bash

cat > instances.json <<END
{
   "instances": [
     {
       "prompt": "Ironman is riding a spaceship to explore the universe."
     }
   ]
}
END

PROJECT_ID="578676399349"  # <---CHANGE THIS TO YOUR PROJECT Number
ENDPOINT_ID="7560580602169131008"  # <---CHANGE THIS TO YOUR ENDPOINT
INPUT_DATA_FILE="instances.json"

curl \
-X POST \
-H "Authorization: Bearer $(gcloud auth print-access-token)" \
-H "Content-Type: application/json" \
https://us-central1-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/us-central1/endpoints/${ENDPOINT_ID}:predict \
-d "@${INPUT_DATA_FILE}" -o img5.json


In [None]:
import base64
import json

with open('img5.json', 'r') as f:
    data = json.load(f)
    with open('img5.jpg', 'wb') as g:
        g.write(base64.b64decode(data['predictions'][0]))

In [None]:
from IPython import display
display.Image('img5.jpg')