In [None]:
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Deploying an auto-scaling model with AI Platform Prediction 

This notebook demonstrates how to deploy a pre-trained model to the AI Platform Prediction service. The notebook will show how to create a new model as well as a new model version. The model version will have auto-scaling settings turned on, so that new nodes will be created and removed as the load changes.

We will use a [Universal Sentence Encoder](https://tfhub.dev/google/universal-sentence-encoder-large/5) model from TensorFlow Hub. This model will create word embeddings from a model that has been trained on a variety of data sources.

The notebook itself is adapted from the Universal Sentence Encoder [sample notebook](https://colab.sandbox.google.com/github/tensorflow/hub/blob/master/examples/colab/semantic_similarity_with_tf_hub_universal_encoder.ipynb).

The main changes to the sample notebook are:
* Creation of AI Platform Prediction model and model version
* Update to `embed()` function to use AI Platform Prediction for inference, rather than the local model
* Streamlining of some non-essential content

## Constants

In [None]:
# Change these parameters!

REGION = 'us-central1' # Update with your region
BUCKET = 'gs://<YOUR-BUCKET>' # Update with your bucket

In [None]:
# These parameters don't need to be changed

MODULE_URL = "https://tfhub.dev/google/universal-sentence-encoder-large/5"
MODEL_NAME = 'universal_sentence_encoder'
PREDICTIONS_FILE = 'predictions.json'

## Imports

In [None]:
import tensorflow as tf
import tensorflow_hub as hub

import datetime
import logging
import numpy as np
import seaborn as sns

## Download TensorFlow Hub Model

In [None]:
# Reduce logging output
logging.getLogger("tensorflow").setLevel(logging.ERROR)

# Download model and return path
model = hub.resolve(MODULE_URL)

print(f"model file {model} saved")

## Deploy AI Platform Prediction model and model version

In [None]:
# Create AI Platform Prediction model

!gcloud ai-platform models create '{MODEL_NAME}' \
  --region='{REGION}'

In [None]:
# Create model version string with the current datetime

now = datetime.datetime.now()
MODEL_VERSION = 'v' + datetime.datetime.strftime(now, '%m%d%Y%H%M%S')

In [None]:
# Write scaling parameters to config.yaml

# Note: these parameters can also be directly specified via the gcloud beta command-line 
#  --metric-targets cpu-usage=80 \
#  --metric-targets gpu-duty-cycle=80 \
#  --min-nodes 2 \
#  --max-nodes 4

CONFIG = '''
autoScaling:
  minNodes: 2
  maxNodes: 4  
  metrics:
    - name: CPU_USAGE
      target: 80  
    - name: GPU_DUTY_CYCLE
      target: 80
'''

!echo '{CONFIG}' > config.yaml

In [None]:
# Create a new model version. This may take several minutes.

!gcloud ai-platform versions create {MODEL_VERSION} \
  --model={MODEL_NAME} \
  --region={REGION} \
  --origin={model} \
  --staging-bucket={BUCKET} \
  --runtime-version=2.2 \
  --framework='TENSORFLOW' \
  --python-version=3.7 \
  --machine-type=n1-standard-4 \
  --accelerator count=1,type=nvidia-tesla-t4 \
  --config=config.yaml

## Use service to make predictions

In [None]:
import json

def embed(input):
    # More info on how to format your input strings:
    # https://cloud.google.com/ai-platform/prediction/docs/reference/rest/v1/projects/predict
    prediction_json = {'instances': input}
    
    # Export predictions to JSON file
    with open(PREDICTIONS_FILE, 'w') as outfile:
        json.dump(prediction_json, outfile)    
        
    # Make predictions
    preds = !gcloud ai-platform predict --model {MODEL_NAME} --json-request={PREDICTIONS_FILE} --region={REGION}
    
    # Convert JSON response into Python object
    preds.pop(0) # Remove warning
    preds = "\n".join(preds) # Concatenate list of strings into one string
    preds = json.loads(preds) # Convert JSON string into Python dict
    
    return preds

In [None]:
# Helper functions for plotting

def plot_similarity(labels, features, rotation):
  corr = np.inner(features, features)
  sns.set(font_scale=1.2)
  g = sns.heatmap(
      corr,
      xticklabels=labels,
      yticklabels=labels,
      vmin=0,
      vmax=1,
      cmap="YlOrRd")
  g.set_xticklabels(labels, rotation=rotation)
  g.set_title("Semantic Textual Similarity")

def run_and_plot(messages_):
  message_embeddings_ = embed(messages_)
  plot_similarity(messages_, message_embeddings_, 90)

In [None]:
# Plot the textual similarity between various messages

messages = [
    # Smartphones
    "I like my phone",
    "My phone is not good.",
    "Your cellphone looks great.",

    # Weather
    "Will it snow tomorrow?",
    "Recently a lot of hurricanes have hit the US",
    "Global warming is real",

    # Food and health
    "An apple a day, keeps the doctors away",
    "Eating strawberries is healthy",
    "Is paleo better than keto?",

    # Asking about age
    "How old are you?",
    "what is your age?",
]

run_and_plot(messages) 

## Cleanup

In [None]:
## Delete model version resource
!gcloud ai-platform versions delete {MODEL_VERSION} --model {MODEL_NAME} --region {REGION} --quiet 

# Delete model resource
!gcloud ai-platform models delete {MODEL_NAME} --region {REGION} --quiet