In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Run and Deploy Stable Diffusion 2.0 model in Vertex AI

### Install TorchServe and AI Platform.

In [2]:
%%writefile requirements.txt
torchserve
torch-model-archiver
torch-workflow-archiver
google-cloud-aiplatform

Writing requirements.txt


In [None]:
# Automatically restart kernel after installs
import os

if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

In [None]:
!mkdir model_artifacts

### Create the customized handler that will be used by the TorchServe.

In [None]:
%%writefile model_artifacts/handler.py

"""Customized handler for stable diffusion 2."""
import base64
import logging
from io import BytesIO

import torch
from diffusers import EulerDiscreteScheduler
from diffusers import StableDiffusionPipeline
from ts.torch_handler.base_handler import BaseHandler

logger = logging.getLogger(__name__)
model_id = 'stabilityai/stable-diffusion-2'


class ModelHandler(BaseHandler):

  def __init__(self):
    self.initialized = False
    self.map_location = None
    self.device = None
    self.use_gpu = True
    self.store_avg = True
    self.pipe = None

  def initialize(self, context):
    """Initializes the pipe."""
    properties = context.system_properties
    gpu_id = properties.get('gpu_id')

    self.map_location, self.device, self.use_gpu = \
      ('cuda', torch.device('cuda:' + str(gpu_id)),
       True) if torch.cuda.is_available() else \
        ('cpu', torch.device('cpu'), False)

    # Use the Euler scheduler here instead
    scheduler = EulerDiscreteScheduler.from_pretrained(model_id,
                                                       subfolder='scheduler')
    pipe = StableDiffusionPipeline.from_pretrained(model_id,
                                                   scheduler=scheduler,
                                                   torch_dtype=torch.float16)
    pipe = pipe.to('cuda')
    # Uncomment the following line to reduce the GPU memory usage.
    # pipe.enable_attention_slicing()
    self.pipe = pipe

    self.initialized = True

  def preprocess(self, requests):
    """Noting to do here."""
    logger.info('requests: %s', requests)
    return requests

  def inference(self, preprocessed_data, *args, **kwargs):
    """Run the inference."""
    images = []
    for pd in preprocessed_data:
      prompt = pd['prompt']
      images.extend(self.pipe(prompt).images)
    return images

  def postprocess(self, output_batch):
    """Converts the images to base64 string."""
    postprocessed_data = []
    for op in output_batch:
      fp = BytesIO()
      op.save(fp, format='JPEG')
      postprocessed_data.append(base64.b64encode(fp.getvalue()).decode('utf-8'))
      fp.close()
    return postprocessed_data


### Create TorchServe model archive file.

In [None]:
!torch-model-archiver \
  -f \
  --model-name stable_diffusion_2 \
  --version 1.0 \
  --handler model_artifacts/handler.py \
  --export-path model_artifacts

### Create the TorchServe config file.

In [None]:
%%writefile model_artifacts/config.properties

service_envelope=json
inference_address=http://0.0.0.0:7080
management_address=http://0.0.0.0:7081

### Optional: local test. (Do not run now)

In [None]:
# !torchserve \
#   --start \
#   --ts-config model_artifacts/config.properties \
#   --model-store model_artifacts \
#   --models stable_diffusion_2.mar

### Create the Dockerfile.

In [None]:
%%writefile model_artifacts/Dockerfile

FROM pytorch/torchserve:latest-gpu

# install dependencies
RUN python3 -m pip install --upgrade pip
RUN pip3 install diffusers transformers accelerate scipy safetensors

USER model-server

# copy model
COPY ./stable_diffusion_2.mar /home/model-server/
COPY ./config.properties /home/model-server/

# expose health and prediction listener ports from the image
EXPOSE 7080
EXPOSE 7081

# run Torchserve HTTP serve to respond to prediction requests
CMD ["torchserve", \
     "--start", \
     "--ts-config=/home/model-server/config.properties", \
     "--models", \
     "stable_diffusion_2.mar", \
     "--model-store", \
     "/home/model-server"]


In [None]:
PROJECT_ID = "yuti-test"  # <---CHANGE THIS TO YOUR PROJECT
BUCKET_NAME = "gs://yuti-test-stable-diffusion"  # <---CHANGE THIS TO YOUR BUCKET
APP_NAME = "stable_diffusion_2"
CUSTOM_PREDICTOR_IMAGE_URI = f"gcr.io/{PROJECT_ID}/pytorch_predict_{APP_NAME}"
print(f"CUSTOM_PREDICTOR_IMAGE_URI = {CUSTOM_PREDICTOR_IMAGE_URI}")

### Build the docker.

In [None]:
!docker build \
  --tag=$CUSTOM_PREDICTOR_IMAGE_URI \
  ./model_artifacts

### Optional: Test the docker locally.

In [None]:
!docker run -t -d --rm -p 7080:7080 --name=stable_diffusion_2 --gpus all $CUSTOM_PREDICTOR_IMAGE_URI
!sleep 120

In [None]:
!docker ps -a

### Sends the curl command request to the local docker.

In [None]:
!curl -X POST \
  -d '{"instances": [{"prompt": "plane"}] }' \
  -H "Content-Type: application/json" \
  http://localhost:7080/predictions/stable_diffusion_2 \
  -o img4.json

### Convert the json to jpeg.

In [None]:
import base64
import json

with open('img4.json', 'r') as f:
    data = json.load(f)
    with open('img4.jpg', 'wb') as g:
        g.write(base64.b64decode(data['predictions'][0]))

In [None]:
from IPython import display
display.Image('img4.jpg')

### Push to the Vertex AI endpoint.

In [None]:
!docker push $CUSTOM_PREDICTOR_IMAGE_URI

In [None]:
from google.cloud import aiplatform
aiplatform.init(project=PROJECT_ID, staging_bucket=BUCKET_NAME)

In [None]:
VERSION = 1
model_display_name = f"{APP_NAME}-v{VERSION}"
model_description = "stable_diffusion_2 container"

MODEL_NAME = APP_NAME
health_route = "/ping"
predict_route = f"/predictions/{MODEL_NAME}"
serving_container_ports = [7080]

In [None]:
model = aiplatform.Model.upload(
    display_name=model_display_name,
    description=model_description,
    serving_container_image_uri=CUSTOM_PREDICTOR_IMAGE_URI,
    serving_container_predict_route=predict_route,
    serving_container_health_route=health_route,
    serving_container_ports=serving_container_ports,
)

model.wait()

print(model.display_name)
print(model.resource_name)

In [None]:
endpoint_display_name = f"{APP_NAME}-endpoint"
endpoint = aiplatform.Endpoint.create(display_name=endpoint_display_name)

In [None]:
traffic_percentage = 100
machine_type = "n1-standard-4"
accelerator_type = "NVIDIA_TESLA_T4"
accelerator_count = 1
deployed_model_display_name = model_display_name
min_replica_count = 1
max_replica_count = 1
sync = True

model.deploy(
    endpoint=endpoint,
    deployed_model_display_name=deployed_model_display_name,
    machine_type=machine_type,
    accelerator_type=accelerator_type,
    accelerator_count=accelerator_count,
    traffic_percentage=traffic_percentage,
    sync=sync,
)

### Testing

In [None]:
%%bash

cat > instances.json <<END
{
   "instances": [
     {
       "prompt": "Ironman is riding a spaceship to explore the universe."
     }
   ]
}
END

PROJECT_ID="578676399349"  # <---CHANGE THIS TO YOUR PROJECT Number
ENDPOINT_ID="7560580602169131008"  # <---CHANGE THIS TO YOUR ENDPOINT
INPUT_DATA_FILE="instances.json"

curl \
-X POST \
-H "Authorization: Bearer $(gcloud auth print-access-token)" \
-H "Content-Type: application/json" \
https://us-central1-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/us-central1/endpoints/${ENDPOINT_ID}:predict \
-d "@${INPUT_DATA_FILE}" -o img5.json


In [None]:
import base64
import json

with open('img5.json', 'r') as f:
    data = json.load(f)
    with open('img5.jpg', 'wb') as g:
        g.write(base64.b64decode(data['predictions'][0]))

In [None]:
from IPython import display
display.Image('img5.jpg')