<a href="https://colab.research.google.com/github/Timure228/Hands-on-ML/blob/main/chapter_19_training_deploying_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## The code below is not to be executed

In [None]:
from pathlib import Path
import tensorflow as tf

# Save Models

model.save(path, save_format="tf")

In [None]:
# Convert input images from NumPy array to Python list
import json

X_new = X_test[:3] # pretend we have 3 new digit images to classify
request_json = json.dumps({ # is a string
    "signature_name": "serving_default",
    "instances": X_new.tolist(),
})

## gRPC API

In [None]:
from tensorflow_serving.apis.predict_pb2 import PredictRequest

request = PredictRequest()
request.model_spec.name = model_name
request.model_spec.signature_name = "serving_default"
input_name = model.input_names[0] # == "flatten_input"
request.inputs[input_name].CopyFrom(tf.make_tensor_proto(X_new))

In [None]:
# grpcio library
import grpc
from tensorflow_serving.apis import prediction_service_pb2_grpc

channel = grpc.insecure_channel('localhost:8500')
predict_service = prediction_service_pb2_grpc.PredictionServiceStub(channel)
response = predict_service.Predict(request, timeout=10.0)

In [None]:
# Create a new model version and export a SavedModel to directory
model = [ ... ]

model_version = "0002"
model_path = Path(model_name) / model_version
model.save(model_path, save_format="tf")

In [None]:
# Google Cloud Storage bucket to store saved models
from google.cloud import storage

project_id = "my_project" # change this to your project ID
bucket_name = "my_bucket" # change this to a unique bucket name
location = "us-central1"

storage_client = storage.Client(project=project_id)
bucket = storage_client.create_bucket(bucket_name, location=location)

In [None]:
def upload_directory(bucket, dirpath):
  dirpath = Path(dirpath)
  for filepath in dirpath.glob("**/*"):
    if filepath.is_file():
      blob = bucket.blob(filepath.relative_to(dirpath.parent).as_posix)
      blob.upload_from_filename(filepath)

upload_directory(bucket, "my_mnist_model")

In [None]:
from google.cloud import aiplatform

server_image = ...

aiplatform.init(project=project_id, location=location)
mnist_model = aiplatform.Model.upload(
    display_name="mnist",
    artifact_uri=f"gs://{bucket_name}/my_mnist_model/0001",
    serving_container_image_uri=server_image
)

In [None]:
# Deploy the model
endpoint = aiplatform.Endpoint.create(display_name="mnist_endpoint")

endpoint.deploy(
    mnist_model,
    min_replica_count=1,
    max_replica_count=5,
    machine_type="n1-standard-4",
    accelerator_type="NVIDIA_TESLA_K80",
    accelerator_count=1
)

In [None]:
response = endpoint.predict(instances=X_new.tolist())

In [None]:
import numpy as np
np.round(response.predictions, 2)

In [None]:
endpoint.undeploy_all() # undeploy all models from the endpoint
endpoint.delete()

### Batch Prediction Jobs on Vertex AI

In [None]:
batch_path = Path("my_mnist_batch")
batch_path.mkdir(exist_ok=True)
with open(batch_path / "my_mnist_batch_jsonl", "w") as jsonl_file: # jsonl stands for JSON Lines
  for image in X_test[:100].tolist()
  jsonl_file.write(json.dumps(image))
  jsonl_file.write("\n")

upload_directory(bucket, batch_path)

In [None]:
from google.cloud.aiplatform_v1.types import batch_prediction_job

# Prediction job
batch_prediction_job  = mnist_model.batch_predict(
    job_siplay_name="my_batch_prediction_job",
    machine_type="n1-standard-4",
    starting_replica_count=1,
    max_replica_count=5,
    accelerator_type="NVIDIA_TESLA_K80",
    accelerator_count=1,
    gcs_source=[f"gs://{bucket_name}/{batch_path.name}/my_mnist_batch.jsonl"],
    gcs_destnation_prefix=f"gs://{bucket_name}/my_mnist_predictions/",
    sync=True # set to False if you don't want to wait for completion
)

In [None]:
y_probas = []
for blob in batch_prediction_job.iter_outputs():
  if "prediction.results" in blob.name:
    for line in blob.download_as_text().splitlines():
      y_proba = json.loads(line)["prediction"]
      y_probas.append(y_proba)

In [None]:
# Check the accuracy
y_pred = np.argmax(y_probas, axis=1)
accuracy = np.sum(y_pred == y_test[:100]) / 100

In [None]:
# Delete the model, directories, batch_prediction_job and bucket
for prefix in ["my_mnist_model/", "my_mnist_batch/", "my_mnist_predictions/"]:
  blobs = bucket.list_blobs(prefix=prefix)
  for blob in blobs:
    blob.delete()

bucket.delete() # if the bucket is empty
batch_prediction_job.delete()

## Deploying the model to Mobile

In [None]:
# Convert a SavedModel to a FlatBuffer and save it to a .tflite file
converter = tf.lite.TFLiteConverter.from_saved_model(str(model_path))
tflite_model = converter.convert()
with open("my_converted_savedmodel.tflite", "wb") as f:
  f.write(tflite_model)

## Using GPUs

In [None]:
# Check if TF sees your GPUs
import tensorflow as tf

physical_gpus = tf.config.list_physical_devices("GPU")
physical_gpus

[]

In [None]:
# Set RAM usage limit for tensorflow
for gpu in physical_gpus:
  tf.config.set_logical_device_configuration(
      gpu,
      [tf.config.LogicalDeviceConfiguration(memory_limit=2048)]
  )

In [None]:
# Tell TF to grab memory only when it needs it
for gpu in physical_gpus:
  tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
# Split one GPU into two logical devices
tf.config.set_logical_device_configuration(
    physical_gpus[0],
    [tf.config.LogicalDeviceConfiguration(memory_limit=2048),
     tf.config.LogicalDeviceConfiguration(memory_limit=2048)]
)

In [None]:
a = tf.Variable([1., 2., 3.])
a.device # .device tells you which device the variable was placed on

'/job:localhost/replica:0/task:0/device:CPU:0'

In [None]:
# Place operation on a different device
with tf.device("/gpu:0"):
  c = tf.Variable([1., 2., 3.])

c.device # I don't have a GPU so it will be set to CPU anyways

'/job:localhost/replica:0/task:0/device:CPU:0'

# Training at Scale Using the Distribution Strategies API

In [None]:
strategy = tf.distribute.MirrorStrategy()

with strategy.scope():
  model = tf.keras.Sequentrial([...]) # Create a Keras model normally
  model.compile([...]) # compile the model normally

batch_size = 100 # preferably divisible by the numbre of replicas
model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid), batch_size=batch_size)

In [None]:
# Or load a model
with strategy.scope():
  model = tf.keras.models.load_model("my_mirrored_model")

In [None]:
# Define subset of GPUs to use
strategy = tf.distribute.MirroredStrategy(devices=["/gpu:0", "/gpu:1"])

In [None]:
# Use data parallelism
strategy = tf.distribute.experimental.CentralStorageStrategy()

In [None]:
# Train a model on a cluster
import tempfile
import tensorflow as tf

strategy = tf.distribute.MultiWorkerMirroredStrategy() # at the start
resolver = tf.distribute.cluster_resolver.TFConfigClusterResolver()
print(f"Starting task {resolver.task_type} #{resolver.task_id}")
[...] # load and split a dataset

with strategy.scope():
  model = tf.keras.Sequential([...]) # build the Keras model
  model.compile([...]) # compile the model

model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=10)

if resolver.task_id == 0: # the chief saves the model to the right location
  model.save("my_mnist_multiworker_model", save_format="tf")
else:
  tmpdir = tempfile.mkdtemp() # other workers save to a temporary directory
  model.save(tmpdir, save_format="tf")
  tf.io.gfile.rmtree(tmpdir) # and we can delete this directory at the end

In [None]:
strategy = tf.distribute.MultiWorkerMirroredStrategy(
    communication_options=tf.distribute.experimental.CommunicationOptions(
        implementation=tf.distribute.experimental.CollectiveCommunication.NCCL))

In [None]:
# Create a TPU strategy
resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)

## Running Large Training Jobs on Vertex AI

In [None]:
import os
[...] # other imports, create MultiWorkerMirroredStrategy, and resolver

if resolver.task_type == "chief":
  model_dir = os.getenv("AIP_MODEL_DIR") # paths provided by Vertex AI
  tensorboard_log_dir = os.getenv("AIP_TENSORBOARD_LOG_DIR")
  checkpoint_dir = os.getenv("AIP_CHECKPOINT_DIR")
else:
  tmp_dir = Path(tempfile.mkdtemp()) # other workers use temporary dirs
  model_dir = tmp_dir / "model"
  tensorboard_log_dir = tmp_dir / "logs"
  checkpoint_dir = tmp_dir / "ckpt"

callbacks = [tf.keras.callbacks.TensorBoard(tensorboard_log_dir),
             tf.keras.callbacks.ModelCheckpoint(checkpoint_dir)]
[...] # build and compile using the strategy scope, just like earlier
model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epohcs=10,
          callbacks=callbacks)
model.save(model_dir, save_format="tf")

In [None]:
# Create custom training job on Vertex AI
custom_training_job = aiplatform.CustomTrainingJob(
    display_name="my_custom_training_job",
    script_path="my_vertex_ai_training_task.py",
    container_uri="gcr.io/cloud-aiplatform/training/tf-gpu.2-4:latest",
    model_serving_container_image_uri=server_image,
    requirements=["gcsfs==2022.3.0"], # not needed this is just an example
    staging_bucket=f"gs://{bucket_name}/staging"
)

In [None]:
# Run it on 2 workers, each with 2 GPUs
mnist_model2 = custom_training_job.run(
    machine_type="n1-standard-4",
    replica_count=2,
    accelerator_type="NVIDIA_TESLA_K80",
    accelerator_count=2
)

## Hyperparameter Tuning on Vertex AI

In [None]:
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--n_hidden", type=int, default=2)
parser.add_argument("--n_neurons", type=int, default=256)
parser.add_argument("--learning_rate", type=float, default=1e-2)
parser.add_argument("--optimizer", default="adam")
args = parser.parse_args()

In [None]:
# Train the model
import tensorflow as tf

def build_model(args):
  with tf.distribute.MirroredStrategy().scope():
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Flatten(input_shape=[28, 28], dtype=tf.uint8))
    for _ in range(args.n_hidden):
      model.add(tf.keras.layers.Dense(args.n_neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(10, activation="softmax"))
    opt = tf.keras.optimizers.get(args.optimizer)
    opt.learning_rate = args.learning_rate
    model.compile(loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"])

    return model

[...] # load dataset
model = build_model(args)
history = model.fit([...])

In [None]:
# Report model's performance back to Vertex AI
import hypertune

hypertune = hypertune.HyperTune()
hypertune.report_hyperparameter_tuning_metric(
    hyperparameter_metric_tag="accuracy", # name of the reported metric
    metric_value=max(history.history["val_accuracy"]), # metric value
    global_step=model.optimizer.iterations.numpy()
  )

In [None]:
trial_job = aiplatform.CustomJob.from_local_script(
    display_name="my_search_trial_job",
    script_path="my_vertex_ai_trial.py", # path to your training script
    container_uri="gcr.io/cloud-aiplatform/training/tf-gpu.2-4:latest",
    staging_bucket=f"gs://{bucket_name}/staging",
    accelerator_type="NVIDIA_TESLA_K80",
    accelerator_count=2, # in this example, each trial will have 2 GPUs
)

In [None]:
# Run hyperparameter tuning job
from google.cloud.aiplatform import hyperparameter_tuning as hpt

hp_job = aiplatform.HyperparameterTuninigJob(
    display_name="my_hp_search_job",
    custom_job=trial_job,
    metric_spec={"accuracy": "maximize"},
    parameter_spec={
        "learning_rate": hpt.DoubleParameterSpec(min=1e-3, max=10, scale="log"),
        "n_neurons": hpt.DoubleParameterSpec(min=1, max=300, scale="linear"),
        "n_hidden": hpt.DoubleParameterSpec(min=1, max=10, scale="linear"),
        "optimizer": hpt.CategoricalParameterSpec(["gsd", "adam"])
    },
    max_trial_count=100,
    parallel_trial_count=20
)

hp_job.run()

In [None]:
# Find best trial

def get_final_metric(trial, metric_id):
  for metric in trial.final_measurement.metrics:
    if metric.metric_id == metric_id:
      return metric.value

trials = hp_job.trials
trial_accuracies = [get_final_metric(trial, "accuracy") for trial in trials]
best_trial = trials[np.argmax(trial_accuracies)]

max(trial_accuracies), best_trial.id