In [None]:
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# E2E ML on GCP: MLOps stage 4 : formalization: get started with Vertex AI Model Evaluation

<table align="left">
  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/ml_ops/stage4/get_started_with_custom_model_evaluation.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
  <td>
    <a href="https://console.cloud.google.com/ai/platform/notebooks/deploy-notebook?download_url=https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/ml_ops/stage4/get_started_with_custom_model_evaluation.ipynb">
      Open in Google Cloud Notebooks
    </a>
  </td>
</table>
<br/><br/><br/>

## Overview


This tutorial demonstrates how to use Vertex AI for E2E MLOps on Google Cloud in production. This tutorial covers stage 4 : formalization: get started with Vertex AI Model Evaluation.

### Dataset

The dataset used for this tutorial is the [Bank Marketing](https://pantheon.corp.google.com/storage/browser/_details/cloud-ml-tables-data/bank-marketing.csv) . This dataset does not require any feature engineering. The version of the dataset you will use in this tutorial is stored in a public Cloud Storage bucket.

### Dataset

The dataset used for this tutorial is the Penguins dataset from [BigQuery public datasets](https://cloud.google.com/bigquery/public-data). The version of the dataset predicts the species.

### Dataset

The dataset used for this tutorial is the [Boston Housing Prices dataset](https://www.cs.toronto.edu/~delve/data/boston/bostonDetail.html). The version of the dataset you will use in this tutorial is built into TensorFlow. The trained model predicts the median price of a house in units of 1K USD.

### Objective

In this tutorial, you learn how to use `Vertex AI Model Evaluation`.

This tutorial uses the following Google Cloud ML services:

- `Vertex AI AutoML`
- `Vertex AI Training`
- `Vertex AI Model Evaluation`

The steps performed include:

- Evaluate an AutoML model.
    - Retrieve the default evaluation metrics from training.
    - Evaluate using a custom evaluation slice.
- Evaluate a BigQuery ML model.
    - Retrieve the default evaluation metrics from training.
    - Export the BigQuery ML model as a TF SavedModel
    - Import the exported model as a Vertex AI Model resource.
    - Evaluate using a custom evaluation slice.
- Evaluate a custom model.
    - Retrieve the evaluation metrics from training.
    - Evaluate using a custom evaluation slice.
- A/B Testing
    - BLAH
- Sandbox Testing
    - BLAH

## Installations

Install *one time* the packages for executing the MLOps notebooks.

In [None]:
ONCE_ONLY = False
if ONCE_ONLY:
    ! pip3 install -U tensorflow==2.5 $USER_FLAG
    ! pip3 install -U tensorflow-data-validation==1.2 $USER_FLAG
    ! pip3 install -U tensorflow-transform==1.2 $USER_FLAG
    ! pip3 install -U tensorflow-io==0.18 $USER_FLAG
    ! pip3 install --upgrade google-cloud-aiplatform[tensorboard] $USER_FLAG
    ! pip3 install --upgrade google-cloud-pipeline-components $USER_FLAG
    ! pip3 install --upgrade google-cloud-bigquery $USER_FLAG
    ! pip3 install --upgrade google-cloud-logging $USER_FLAG
    ! pip3 install --upgrade apache-beam[gcp] $USER_FLAG
    ! pip3 install --upgrade pyarrow $USER_FLAG
    ! pip3 install --upgrade cloudml-hypertune $USER_FLAG
    ! pip3 install --upgrade kfp $USER_FLAG
    ! pip3 install --upgrade torchvision $USER_FLAG
    ! pip3 install --upgrade rpy2 $USER_FLAG
    ! pip3 install --upgrade python-tabulate $USER_FLAG
    ! pip3 install -U opencv-python-headless==4.5.2.52 $USER_FLAG

### Restart the kernel

Once you've installed the additional packages, you need to restart the notebook kernel so it can find the packages.

In [None]:
import os

if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython
    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

#### Set your project ID

**If you don't know your project ID**, you may be able to get your project ID using `gcloud`.

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

In [None]:
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = ! gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)

In [None]:
! gcloud config set project $PROJECT_ID

#### Region

You can also change the `REGION` variable, which is used for operations
throughout the rest of this notebook.  Below are regions supported for Vertex AI. We recommend that you choose the region closest to you.

- Americas: `us-central1`
- Europe: `europe-west4`
- Asia Pacific: `asia-east1`

You may not use a multi-regional bucket for training with Vertex AI. Not all regions provide support for all Vertex AI services.

Learn more about [Vertex AI regions](https://cloud.google.com/vertex-ai/docs/general/locations).

In [None]:
REGION = 'us-central1'  # @param {type: "string"}

#### Timestamp

If you are in a live tutorial session, you might be using a shared test account or project. To avoid name collisions between users on resources created, you create a timestamp for each instance session, and append the timestamp onto the name of resources you create in this tutorial.

In [None]:
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

### Create a Cloud Storage bucket

**The following steps are required, regardless of your notebook environment.**

When you initialize the Vertex SDK for Python, you specify a Cloud Storage staging bucket. The staging bucket is where all the data associated with your dataset and model resources are retained across sessions.

Set the name of your Cloud Storage bucket below. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.

In [None]:
BUCKET_NAME = "gs://[your-bucket-name]"  # @param {type:"string"}

In [None]:
if BUCKET_NAME == "" or BUCKET_NAME is None or BUCKET_NAME == "gs://[your-bucket-name]":
    BUCKET_NAME = "gs://" + PROJECT_ID + "aip-" + TIMESTAMP

**Only if your bucket doesn't already exist**: Run the following cell to create your Cloud Storage bucket.

In [None]:
! gsutil mb -l $REGION $BUCKET_NAME

Finally, validate access to your Cloud Storage bucket by examining its contents:

In [None]:
! gsutil ls -al $BUCKET_NAME

#### Service Account

**If you don't know your service account**, try to get your service account using `gcloud` command by executing the second cell below.

In [None]:
SERVICE_ACCOUNT = "[your-service-account]"  # @param {type:"string"}

In [None]:
if SERVICE_ACCOUNT == "" or SERVICE_ACCOUNT is None or SERVICE_ACCOUNT == "[your-service-account]":
    # Get your GCP project id from gcloud
    shell_output = !gcloud auth list 2>/dev/null
    SERVICE_ACCOUNT = shell_output[2].replace('*', '').strip()
    print("Service Account:", SERVICE_ACCOUNT)

#### Set service account access for Vertex AI Pipelines

Run the following commands to grant your service account access to read and write pipeline artifacts in the bucket that you created in the previous step -- you only need to run these once per service account.

In [None]:
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectCreator $BUCKET_NAME

! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectViewer $BUCKET_NAME

### Set up variables

Next, set up some variables used throughout the tutorial.
### Import libraries and define constants

In [None]:
import google.cloud.aiplatform as aip

#### Import TensorFlow

Import the TensorFlow package into your Python environment.

In [None]:
import tensorflow as tf

In [None]:
import json

from kfp import dsl
from kfp.v2 import compiler
from kfp.v2.dsl import component

#### Import BigQuery

Import the BigQuery package into your Python environment.

In [None]:
from google.cloud import bigquery

### Initialize Vertex AI SDK for Python

Initialize the Vertex AI SDK for Python for your project and corresponding bucket.

In [None]:
aip.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_NAME)

### Create BigQuery client

Create the BigQuery client.

In [None]:
bqclient = bigquery.Client()

#### Set hardware accelerators

You can set hardware accelerators for training and prediction.

Set the variables `TRAIN_GPU/TRAIN_NGPU` and `DEPLOY_GPU/DEPLOY_NGPU` to use a container image supporting a GPU and the number of GPUs allocated to the virtual machine (VM) instance. For example, to use a GPU container image with 4 Nvidia Telsa K80 GPUs allocated to each VM, you would specify:

    (aip.AcceleratorType.NVIDIA_TESLA_K80, 4)


Otherwise specify `(None, None)` to use a container image to run on a CPU.

Learn more about [hardware accelerator support for your region](https://cloud.google.com/vertex-ai/docs/general/locations#accelerators).

*Note*: TF releases before 2.3 for GPU support will fail to load the custom model in this tutorial. It is a known issue and fixed in TF 2.3. This is caused by static graph ops that are generated in the serving function. If you encounter this issue on your own custom models, use a container image for TF 2.3 with GPU support.

In [None]:
if os.getenv("IS_TESTING_TRAIN_GPU"):
    TRAIN_GPU, TRAIN_NGPU = (aip.gapic.AcceleratorType.NVIDIA_TESLA_K80, int(os.getenv("IS_TESTING_TRAIN_GPU")))
else:
    TRAIN_GPU, TRAIN_NGPU = (aip.gapic.AcceleratorType.NVIDIA_TESLA_K80, 1)

if os.getenv("IS_TESTING_DEPLOY_GPU"):
    DEPLOY_GPU, DEPLOY_NGPU = (aip.gapic.AcceleratorType.NVIDIA_TESLA_K80, int(os.getenv("IS_TESTING_DEPLOY_GPU")))
else:
    DEPLOY_GPU, DEPLOY_NGPU = (None, None)

#### Set pre-built containers

Set the pre-built Docker container image for training and prediction.


For the latest list, see [Pre-built containers for training](https://cloud.google.com/ai-platform-unified/docs/training/pre-built-containers).


For the latest list, see [Pre-built containers for prediction](https://cloud.google.com/ai-platform-unified/docs/predictions/pre-built-containers).

In [None]:
if os.getenv("IS_TESTING_TF"):
    TF = os.getenv("IS_TESTING_TF")
else:
    TF = '2-1'.replace('.', '-')

if TF[0] == '2':
    if TRAIN_GPU:
        TRAIN_VERSION = 'tf-gpu.{}'.format(TF)
    else:
        TRAIN_VERSION = 'tf-cpu.{}'.format(TF)
    if DEPLOY_GPU:
        DEPLOY_VERSION = 'tf2-gpu.{}'.format(TF)
    else:
        DEPLOY_VERSION = 'tf2-cpu.{}'.format(TF)
else:
    if TRAIN_GPU:
        TRAIN_VERSION = 'tf-gpu.{}'.format(TF)
    else:
        TRAIN_VERSION = 'tf-cpu.{}'.format(TF)
    if DEPLOY_GPU:
        DEPLOY_VERSION = 'tf-gpu.{}'.format(TF)
    else:
        DEPLOY_VERSION = 'tf-cpu.{}'.format(TF)

TRAIN_IMAGE = "{0}-docker.pkg.dev/vertex-ai/training/{1}:latest".format(REGION.split('-')[0],TRAIN_VERSION)
DEPLOY_IMAGE = "{0}-docker.pkg.dev/vertex-ai/prediction/{1}:latest".format(REGION.split('-')[0],DEPLOY_VERSION)

print("Training:", TRAIN_IMAGE, TRAIN_GPU, TRAIN_NGPU)
print("Deployment:", DEPLOY_IMAGE, DEPLOY_GPU, DEPLOY_NGPU)

#### Set machine type

Next, set the machine type to use for training and prediction.

- Set the variables `TRAIN_COMPUTE` and `DEPLOY_COMPUTE` to configure  the compute resources for the VMs you will use for for training and prediction.
 - `machine type`
     - `n1-standard`: 3.75GB of memory per vCPU.
     - `n1-highmem`: 6.5GB of memory per vCPU
     - `n1-highcpu`: 0.9 GB of memory per vCPU
 - `vCPUs`: number of \[2, 4, 8, 16, 32, 64, 96 \]

*Note: The following is not supported for training:*

 - `standard`: 2 vCPUs
 - `highcpu`: 2, 4 and 8 vCPUs

*Note: You may also use n2 and e2 machine types for training and deployment, but they do not support GPUs*.

In [None]:
if os.getenv("IS_TESTING_TRAIN_MACHINE"):
    MACHINE_TYPE = os.getenv("IS_TESTING_TRAIN_MACHINE")
else:
    MACHINE_TYPE = 'n1-standard'

VCPU = '4'
TRAIN_COMPUTE = MACHINE_TYPE + '-' + VCPU
print('Train machine type', TRAIN_COMPUTE)

if os.getenv("IS_TESTING_DEPLOY_MACHINE"):
    MACHINE_TYPE = os.getenv("IS_TESTING_DEPLOY_MACHINE")
else:
    MACHINE_TYPE = 'n1-standard'

VCPU = '4'
DEPLOY_COMPUTE = MACHINE_TYPE + '-' + VCPU
print('Deploy machine type', DEPLOY_COMPUTE)

## Introduction to Vertex AI Model Evaluation for AutoML models.

For AutoML models, you can retrieve the model evaluation metrics that were obtained during training from the dataset split into train and test, using the `Vertex AI Model Evaluation` service.

Additionally, you can evaluate an AutoML model with custom evaluation slices using the combination of `BatchPredictionOp` and `ModelEvaluationOp` components, as:

    - The custom evaluation slice data contains the label values (ground truths).
    - Perform a batch prediction on the custom evaluation slice.
    - Perform a model evaluation with the batch prediction results and label values.

#### Location of Cloud Storage training data.

Now set the variable `IMPORT_FILE` to the location of the CSV index file in Cloud Storage.

### Create training and custom evaluation slice

First, for demonstration purposes, you simulate having a custom evaluation slice separate from the train/test/eval dataset, by removing a small portion of the examples from the AutoML dataset and creating a new evaluation dataset containing the removed examples.

- `IMPORT_TRAIN`: The dataset for train/test/eval split.
- `IMPORT_EVAL`: The simulated custom evaluation slice.

In [None]:
IMPORT_FILE = 'gs://cloud-ml-tables-data/bank-marketing.csv'
! gsutil cat {IMPORT_FILE} | head -n 40000 > train.csv
! gsutil cat {IMPORT_FILE} | head -n 1 >eval.csv
! gsutil cat {IMPORT_FILE} | tail -n 5200 >> eval.csv

IMPORT_TRAIN = BUCKET_NAME + "/train.csv"
IMPORT_EVAL = BUCKET_NAME + "/eval.csv"

! gsutil cp train.csv {IMPORT_TRAIN}
! gsutil cp eval.csv {IMPORT_EVAL}

! rm -f train.csv eval.csv

### Create AutoML model evaluation component

The Vertex AI pre-built pipeline components does not currently have a component for retrieiving the model evaluations for a AutoML model. So, you will first write your own component, as follows:

- Takes as input the region and Model artifacts returned from an AutoML training component.
- Create a client interface to the Vertex AI Model service (`metadata["resource_name"]).
- Construct the resource ID for the model from the model artifact parameter.
- Retrieve the model evaluation
- Return the model evaluation as a string.

In [None]:
from kfp.v2.dsl import Input, Output, Artifact, Model

@component(packages_to_install=["google-cloud-aiplatform"])
def evaluateAutoMLModelOp(model: Input[Artifact],
                          region: str,
                          model_evaluation: Output[Artifact]):
    import google.cloud.aiplatform.gapic as gapic
    import logging


    # Get a reference to the Model Service client
    client_options = {"api_endpoint": f"{region}-aiplatform.googleapis.com"}
    model_service_client = gapic.ModelServiceClient(
        client_options=client_options
    )

    model_id = model.metadata["resourceName"]

    model_evaluations = model_service_client.list_model_evaluations(parent=model_id)
    model_evaluation = list(model_evaluations)[0]
    logging.info(model_evaluation)

### Construct pipeline for AutoML training, and batch model evaluation

Next, construct the pipeline with the following tasks:

- Create a Vertex AI Dataset resource.
- Train a AutoML tabular classification model.
- Retrieve the AutoML evaluation statistics.
- Make a batch prediction with the AutoML model, using an evaluation slice that was not used during training.
- Evaluate the AutoML model using the results from the batch prediction.

In [None]:
PIPELINE_ROOT = "{}/pipeline_root/automl_lbn_training".format(BUCKET_NAME)

@dsl.pipeline(name="automl-lbn-training",
              description="AutoML tabular classification training"
             )
def pipeline(import_file: str,
             batch_files: list,
             display_name: str,
             bucket: str = PIPELINE_ROOT,
             project: str = PROJECT_ID,
             region: str = REGION):
    from google_cloud_pipeline_components import aiplatform as gcc_aip
    from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp
    from google_cloud_pipeline_components.experimental.evaluation import ModelEvaluationOp

    dataset_op = gcc_aip.TabularDatasetCreateOp(
        project=project,
        display_name=display_name,
        gcs_source=import_file
    )

    training_op = gcc_aip.AutoMLTabularTrainingJobRunOp(
        project=project,
        display_name=display_name,
        optimization_prediction_type="classification",
        dataset=dataset_op.outputs["dataset"],
        model_display_name=display_name,
        training_fraction_split=0.8,
        validation_fraction_split=0.1,
        test_fraction_split=0.1,
        budget_milli_node_hours=8000,
        optimization_objective="minimize-log-loss",
        target_column="Deposit"
    )

    eval_op = evaluateAutoMLModelOp(model=training_op.outputs["model"],
                                    region=region)

    batch_op = ModelBatchPredictOp(
        project=project,
        job_display_name="batch_predict_job",
        model=training_op.outputs["model"],
        gcs_source_uris=batch_files,
        gcs_destination_output_uri_prefix=bucket,
        instances_format="csv",
        predictions_format="jsonl",
        model_parameters={},
        machine_type=DEPLOY_COMPUTE,
        starting_replica_count=1,
        max_replica_count=1,
    ).after(eval_op)


    batch_eval_op = ModelEvaluationOp(
        project=project,
        root_dir=bucket,
        problem_type="classification",
        classification_type="multiclass",
        ground_truth_column="Deposit",
        class_names=["1", "2"],
        predictions_format="jsonl",
        batch_prediction_job=batch_op.outputs["batchpredictionjob"],
    )

### Compile and execute the AutoML training, and batch model evaluation pipeline

Next, you compile the pipeline and then execute it. The pipeline takes the following parameters, which are passed as the dictionary `parameter_values`:

- `import_file`: The Cloud Storage location of the training data.
- `batch_files`: A list of one or more Cloud Storage locations of evaluation data.
- `display_name`: Display name for Vertex AI Model and Endpoint resources.
- `project`: The project ID.
- `region`: The region.

In [None]:
compiler.Compiler().compile(
    pipeline_func=pipeline, package_path="automl_lbn_training.json"
)

pipeline = aip.PipelineJob(
    display_name="automl_lbn_training",
    template_path="automl_lbn_training.json",
    pipeline_root=PIPELINE_ROOT,
    parameter_values = { 'import_file': IMPORT_TRAIN,
                         'batch_files': [IMPORT_EVAL],
                         'display_name': "bank" + TIMESTAMP,
                         'project': PROJECT_ID,
                         'region': REGION
                       }

)

pipeline.run()

! rm -f automl_lbn_training.json

### View the AutoML training and batch evaluation pipeline results

In [None]:
PROJECT_NUMBER = pipeline.gca_resource.name.split('/')[1]
print(PROJECT_NUMBER)

def print_pipeline_output(job, output_task_name):
    JOB_ID = job.name
    print(JOB_ID)
    for _ in range(len(job.gca_resource.job_detail.task_details)):
        TASK_ID = job.gca_resource.job_detail.task_details[_].task_id
        EXECUTE_OUTPUT = PIPELINE_ROOT + '/' + PROJECT_NUMBER + '/' + JOB_ID + '/' + output_task_name + '_' + str(TASK_ID) + '/executor_output.json'
        GCP_RESOURCES = PIPELINE_ROOT + '/' + PROJECT_NUMBER + '/' + JOB_ID + '/' + output_task_name + '_' + str(TASK_ID) + '/gcp_resources'
        EVAL_METRICS = PIPELINE_ROOT + '/' + PROJECT_NUMBER + '/' + JOB_ID + '/' + output_task_name + '_' + str(TASK_ID) + '/evaluation_metrics'
        if tf.io.gfile.exists(EXECUTE_OUTPUT):
            ! gsutil cat $EXECUTE_OUTPUT
            return EXECUTE_OUTPUT
        elif tf.io.gfile.exists(GCP_RESOURCES):
            ! gsutil cat $GCP_RESOURCES
            return GCP_RESOURCES
        elif tf.io.gfile.exists(EVAL_METRICS):
            ! gsutil cat $EVAL_METRICS
            return EVAL_METRICS

    return None

print("tabular-dataset-create")
artifacts = print_pipeline_output(pipeline, 'tabular-dataset-create')
print('\n\n')
print("automl-tabular-training-job")
artifacts = print_pipeline_output(pipeline, 'automl-tabular-training-job')
print('\n\n')
print("evaluateautomlmodelop")
artifacts = print_pipeline_output(pipeline, 'evaluateautomlmodelop')
try:
    output = !gsutil cat $artifacts
    output = json.loads(output[0])
    metrics = output['parameters']['Output']['stringValue']
    print('\n')
    print(metrics)
except:
    pass # cache
print('\n\n')
print("model-batch-predict")
artifacts = print_pipeline_output(pipeline, 'model-batch-predict')
output = !gsutil cat $artifacts
output = json.loads(output[0])
print('\n\n')
print(output['artifacts']['batchpredictionjob']['artifacts'][0]['metadata']['gcsOutputDirectory'])
print("model-evaluation")
artifacts = print_pipeline_output(pipeline, 'model-evaluation')
output = !gsutil cat $artifacts
output = json.loads(output[0])
metrics = output['slicedMetrics'][0]['metrics']
print(metrics)

### Delete a pipeline job

After a pipeline job is completed, you can delete the pipeline job with the method `delete()`.  Prior to completion, a pipeline job can be canceled with the method `cancel()`.

In [None]:
pipeline.delete()

## Introduction to Vertex AI Model Evaluation for BigQuery ML models.

For BigQuery ML models, you can retrieve the model evaluation metrics that were obtained during training from the dataset split into train and test, using the `BigQuery ML` service.

Additionally, you can evaluate an BigQuery ML model with custom evaluation slices, by first exporting the model artifacts and importing them as a Vertex AI Model resource, and then using the combination of `BatchPredictionOp` and `ModelEvaluationOp` components, as:

    - The custom evaluation slice data contains the label values (ground truths).
    - Perform a batch prediction on the custom evaluation slice.
    - Perform a model evaluation with the batch prediction results and label values.

### Create training and custom evaluation slice

First, for demonstration purposes, you simulate having a custom evaluation slice separate from the train/test/eval dataset, by removing a small portion of the examples from the BigQuery dataset and creating a new evaluation dataset containing the removed examples.

- `BQ_TABLE_TRAIN`: The dataset for train/test/eval split.
- `BQ_EVAL_EVAL`: The simulated custom evaluation slice.

In [None]:
BQ_TABLE = 'bigquery-public-data.ml_datasets.penguins'
BQ_DATASET = BQ_TABLE.split('.')[1]


def get_data(slice_name, limit):
    query = f'''
    CREATE OR REPLACE TABLE `{slice_name}`
    AS (
        WITH
          penguins AS (
          SELECT
            island,
            sex,
            culmen_length_mm,
            culmen_depth_mm,
            flipper_length_mm,
            body_mass_g,
            species
          FROM
            `{BQ_TABLE}`
        )

        SELECT
          island,
          sex,
          culmen_length_mm,
          culmen_depth_mm,
          flipper_length_mm,
          body_mass_g,
          species
        FROM
          penguins
        LIMIT {limit}
    )
    '''

    response = bqclient.query(query)
    _ = response.result()

BQ_TABLE_EVAL = f"{PROJECT_ID}.{BQ_DATASET}.penguins_eval"
IMPORT_EVAL = f"bq://{BQ_TABLE_EVAL}"
LIMIT = 44
get_data(BQ_TABLE_EVAL, LIMIT)

BQ_TABLE_TRAIN = f"{PROJECT_ID}.{BQ_DATASET}.penguins_train"
IMPORT_TRAIN = f"bq://{BQ_TABLE_TRAIN}"
LIMIT = "300 OFFSET 44"
get_data(BQ_TABLE_TRAIN, LIMIT)

### Construct pipeline for BigQuery ML training, and batch model evaluation

Next, construct the pipeline with the following tasks:

- Create a BigQuery ML Dataset resource.
- Train a BigQuery ML tabular classification model.
- Retrieve the BigQuery ML evaluation statistics.
- Make a batch prediction with the BigQuery ML model, using an evaluation slice that was not used during training.
- Evaluate the BigQuery ML model using the results from the batch prediction.

In [None]:
PIPELINE_ROOT = f"{BUCKET_NAME}/bq_query"

@dsl.pipeline(
    name="bq-hello-world",
    pipeline_root=PIPELINE_ROOT)
def pipeline(bq_train_table: str,
             bq_eval_table: str,
             label: str,
             class_names: list,
             dataset: str,
             model: str,
             artifact_uri: str,
             num_trials: int,
             deploy_image: str,
             machine_type: str,
             min_replica_count: int,
             max_replica_count: int,
             display_name: str,
             bucket: str,
             model_type: str = 'dnn_classifier',
             accelerator_type: str = "",
             accelerator_count: int = 0,
             project: str = PROJECT_ID,
             location: str = "US",
             region: str = "us-central1"):
    from google_cloud_pipeline_components.v1.bigquery import (
        BigqueryQueryJobOp,
        BigqueryCreateModelJobOp,
        BigqueryEvaluateModelJobOp,
        BigqueryExportModelJobOp
    )
    from google_cloud_pipeline_components.v1.model import (
        ModelUploadOp
    )
    from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp
    from google_cloud_pipeline_components.experimental.evaluation import ModelEvaluationOp

    bq_dataset = BigqueryQueryJobOp(
          project=project,
          location="US",
          query=f"CREATE SCHEMA {dataset}"
    )

    bq_model = BigqueryCreateModelJobOp(
          project=project,
          location=location,
          query=f"CREATE OR REPLACE MODEL {dataset}.{model} OPTIONS (model_type='{model_type}', labels=['{label}']) AS SELECT * FROM `{bq_train_table}` WHERE body_mass_g IS NOT NULL AND sex IS NOT NULL"
    ).after(bq_dataset)

    bq_eval = BigqueryEvaluateModelJobOp(
          project=PROJECT_ID,
          location="US",
          model=bq_model.outputs["model"]
    ).after(bq_model)

    bq_export = BigqueryExportModelJobOp(
          project=project,
          location=location,
          model=bq_model.outputs["model"],
          model_destination_path=artifact_uri
    ).after(bq_model)

    model_upload = ModelUploadOp(
        display_name=display_name,
        artifact_uri=artifact_uri,
        serving_container_image_uri=deploy_image,
        project=project,
        location=region
    ).after(bq_export)

    batch_predict = ModelBatchPredictOp(
        project=project,
        job_display_name="batch_predict_job",
        model=model_upload.outputs["model"],
        bigquery_source_input_uri=bq_eval_table,
        bigquery_destination_output_uri=f"bq://{project}.{dataset}",
        instances_format="bigquery",
        predictions_format="bigquery",
        model_parameters={},
        machine_type=DEPLOY_COMPUTE,
        starting_replica_count=min_replica_count,
        max_replica_count=max_replica_count,,
        accelerator_type=accelerator_type,
        accelerator_count=accelerator_count
    ).after(model_upload)

    batch_eval = ModelEvaluationOp(
        project=project,
        root_dir=bucket,
        problem_type="classification",
        classification_type="multiclass",
        ground_truth_column=label,
        class_names=class_names,
        predictions_format="jsonl",
        batch_prediction_job=batch_predict.outputs["batchpredictionjob"],
    )

### Compile and execute the BigQuery ML training, and batch model evaluation pipeline

Next, you compile the pipeline and then execute it. The pipeline takes the following parameters, which are passed as the dictionary `parameter_values`:

- `bq_train_table`: The BigQuery table containing the training data.
- `bq_eval_table`: The BigQuery table containing the evaluation data.
- `label`: The corresponding label for the BigQuery dataset.
- `class_names`: A list of the corresponding class names for the labels.
- `dataset`: The BigQuery dataset component name.
- `model`: The BigQuery model component name.
- `artifact_uri`: The Cloud Storage location to export the BigQuery model artifacts.
- `num_trials`: If greater than one, will perform hyperparameter tuning for the specified number of trials using the Vertex AI Vizier service.
- `deploy_image`: The container image for serving predictions.
- `machine_type`: The VM for serving predictions.
- `min_replica_count`/`max_replica_count`: The number of virtual machines for auto-scaling predictions.
- `accelerator_type`: The type of HW accelerator, if any.
- `accelerator_count`: The number of HW accelerators per machine, if any.
- `display_name`: Display name for Vertex AI Model resource.
- `project`: The project ID.
- `region`: The region.

In [None]:
MODEL_DIR = BUCKET_NAME + '/bqmodel'

compiler.Compiler().compile(
    pipeline_func=pipeline, package_path="bqml.json"
)

pipeline = aip.PipelineJob(
    display_name="bqml",
    template_path="bqml.json",
    pipeline_root=PIPELINE_ROOT,
    parameter_values={
        'bq_train_table': BQ_TABLE_TRAIN,
        'bq_eval_table': 'bq://' + BQ_TABLE_EVAL,
        'label': "species",
        'class_names': ["Adelie Penguin (Pygoscelis adeliae)", "Chinstrap penguin (Pygoscelis antarctica)", "Gentoo penguin (Pygoscelis papua)"],
        'dataset': "bqml_tutorial",
        'model': "penguins_model",
        'artifact_uri': MODEL_DIR,
        'num_trials': 1,
        'deploy_image': DEPLOY_IMAGE,
        'display_name': 'penguins',
        'machine_type': DEPLOY_COMPUTE,
        'accelerator_type': DEPLOY_GPU.name,
        'accelerator_count': 1,
        'min_replica_count': 1,
        'max_replica_count': 1,
        'bucket' : BUCKET_NAME,
        'project': PROJECT_ID,
        'location': "US"
    },
    enable_caching=False
)

pipeline.run()

! rm -rf bqml.json

### View the BigQuery ML training and batch evaluation pipeline results

In [None]:
PROJECT_NUMBER = pipeline.gca_resource.name.split('/')[1]
print(PROJECT_NUMBER)

def print_pipeline_output(job, output_task_name):
    JOB_ID = job.name
    print(JOB_ID)
    for _ in range(len(job.gca_resource.job_detail.task_details)):
        TASK_ID = job.gca_resource.job_detail.task_details[_].task_id
        EXECUTE_OUTPUT = PIPELINE_ROOT + '/' + PROJECT_NUMBER + '/' + JOB_ID + '/' + output_task_name + '_' + str(TASK_ID) + '/executor_output.json'
        GCP_RESOURCES = PIPELINE_ROOT + '/' + PROJECT_NUMBER + '/' + JOB_ID + '/' + output_task_name + '_' + str(TASK_ID) + '/gcp_resources'
        EVAL_METRICS = PIPELINE_ROOT + '/' + PROJECT_NUMBER + '/' + JOB_ID + '/' + output_task_name + '_' + str(TASK_ID) + '/evaluation_metrics'
        if tf.io.gfile.exists(EXECUTE_OUTPUT):
            ! gsutil cat $EXECUTE_OUTPUT
            return EXECUTE_OUTPUT
        elif tf.io.gfile.exists(GCP_RESOURCES):
            ! gsutil cat $GCP_RESOURCES
            return GCP_RESOURCES
        elif tf.io.gfile.exists(EVAL_METRICS):
            ! gsutil cat $EVAL_METRICS
            return EVAL_METRICS

    return None

print("bigquery-query-job")
artifacts = print_pipeline_output(pipeline, 'bigquery-query-job')
print('\n\n')
print("bigquery-create-model-job")
artifacts = print_pipeline_output(pipeline, 'bigquery-create-model-job')
print('\n\n')
print("bigquery-evaluate-model-job")
artifacts = print_pipeline_output(pipeline, 'bigquery-evaluate-model-job')
print('\n\n')
print("bigquery-export-model-job")
artifacts = print_pipeline_output(pipeline, 'bigquery-export-model-job')
print('\n\n')
print("model-upload")
artifacts = print_pipeline_output(pipeline, 'model-upload')
print('\n\n')
print("model-batch-predict")
artifacts = print_pipeline_output(pipeline, 'model-batch-predict')
output = !gsutil cat $artifacts
output = json.loads(output[0])
print('\n\n')
print(output['artifacts']['batchpredictionjob']['artifacts'][0]['metadata']['gcsOutputDirectory'])
print("model-evaluation")
artifacts = print_pipeline_output(pipeline, 'model-evaluation')
output = !gsutil cat $artifacts
output = json.loads(output[0])
metrics = output['slicedMetrics'][0]['metrics']
print(metrics)

### Delete a pipeline job

After a pipeline job is completed, you can delete the pipeline job with the method `delete()`.  Prior to completion, a pipeline job can be canceled with the method `cancel()`.

In [None]:
pipeline.delete()

#### Delete the BigQuery model and dataset

Next, delete the BigQuery model and dataset.

In [None]:
try:
    job = bqclient.delete_model("bqml_tutorial.penguins_model")
except:
    pass
job = bqclient.delete_dataset("bqml_tutorial", delete_contents=True)

## Introduction to Vertex AI Model Evaluation for custom models.

For custom models, you can retrieve the model evaluation metrics that were obtained during training from the dataset split into train and test, by downloading the results that your training script saved. We recommend the following for your Python training package:

- `--evaluate`: Use the command-line argument to optional perform a post-training evaluation.
- `get_data()`: Split the dataset into train/test/eval.
- `evaluate_model()`: Evaluate the model with the test split.
- `metrics.txt`: Save the evaluation metrics as a text file in the same Cloud Storage location as the saved model artifacts.

Additionally, you can evaluate an AutoML model with custom evaluation slices using the combination of `BatchPredictionOp` and `ModelEvaluationOp` components, as:

    - The custom evaluation slice data contains the label values (ground truths).
    - Perform a batch prediction on the custom evaluation slice.
    - Perform a model evaluation with the batch prediction results and label values.

### Examine the training package

#### Package layout

Before you start the training, you will look at how a Python package is assembled for a custom training job. When unarchived, the package contains the following directory/file layout.

- PKG-INFO
- README.md
- setup.cfg
- setup.py
- trainer
  - \_\_init\_\_.py
  - task.py

The files `setup.cfg` and `setup.py` are the instructions for installing the package into the operating environment of the Docker image.

The file `trainer/task.py` is the Python script for executing the custom training job. *Note*, when we referred to it in the worker pool specification, we replace the directory slash with a dot (`trainer.task`) and dropped the file suffix (`.py`).

#### Package Assembly

In the following cells, you will assemble the training package.

In [None]:
# Make folder for Python training script
! rm -rf custom
! mkdir custom

# Add package information
! touch custom/README.md

setup_cfg = "[egg_info]\n\ntag_build =\n\ntag_date = 0"
! echo "$setup_cfg" > custom/setup.cfg

setup_py = "import setuptools\n\nsetuptools.setup(\n\n    install_requires=[\n\n        'tensorflow==2.5.0',\n\n        'tensorflow_datasets==1.3.0',\n\n    ],\n\n    packages=setuptools.find_packages())"
! echo "$setup_py" > custom/setup.py

pkg_info = "Metadata-Version: 1.0\n\nName: Boston Housing \n\nVersion: 0.0.0\n\nSummary: Demostration training script\n\nHome-page: www.google.com\n\nAuthor: Google\n\nAuthor-email: aferlitsch@google.com\n\nLicense: Public\n\nDescription: Demo\n\nPlatform: Vertex"
! echo "$pkg_info" > custom/PKG-INFO

# Make the training subfolder
! mkdir custom/trainer
! touch custom/trainer/__init__.py

#### Task.py contents

In the next cell, you write the contents of the training script task.py. I won't go into detail, it's just there for you to browse. In summary:

- Get the directory where to save the model artifacts from the command line (`--model_dir`), and if not specified, then from the environment variable `AIP_MODEL_DIR`.
- Loads Boston Housing dataset from TF.Keras builtin datasets
- Builds a simple deep neural network model using TF.Keras model API.
- Compiles the model (`compile()`).
- Sets a training distribution strategy according to the argument `args.distribute`.
- Trains the model (`fit()`) with epochs specified by `args.epochs`.
- (optional) Evaluates the model (`evaluate()`) and saves the results as `metrics.txt` in the same location as the model artifacts.
- Saves the trained model (`save(args.model_dir)`) to the specified model directory.
- Saves the maximum value for each feature `f.write(str(params))` to the specified parameters file.

In [None]:
%%writefile custom/trainer/task.py
# Single, Mirror and Multi-Machine Distributed Training for Boston Housing

import tensorflow_datasets as tfds
import tensorflow as tf
from tensorflow.python.client import device_lib
import numpy as np
import argparse
import os
import sys
import logging
tfds.disable_progress_bar()

parser = argparse.ArgumentParser()
parser.add_argument('--model-dir', dest='model_dir',
                    default=os.getenv('AIP_MODEL_DIR'), type=str, help='Model dir.')
parser.add_argument('--lr', dest='lr',
                    default=0.001, type=float,
                    help='Learning rate.')
parser.add_argument('--epochs', dest='epochs',
                    default=20, type=int,
                    help='Number of epochs.')
parser.add_argument('--steps', dest='steps',
                    default=100, type=int,
                    help='Number of steps per epoch.')
parser.add_argument('--batch-size', dest='batch_size',
                    default=16, type=int,
                    help='Size of mini-batches.')
parser.add_argument('--distribute', dest='distribute', type=str, default='single',
                    help='distributed training strategy')
parser.add_argument('--param-file', dest='param_file',
                    default='/tmp/param.txt', type=str,
                    help='Output file for parameters')
parser.add_argument('--evaluate', dest='evaluate',
                    default=False, type=bool,
                    help='Whether to perform model evaluaton')
args = parser.parse_args()

logging.info('Python Version = {}'.format(sys.version))
logging.info('TensorFlow Version = {}'.format(tf.__version__))
logging.info('TF_CONFIG = {}'.format(os.environ.get('TF_CONFIG', 'Not found')))

# Single Machine, single compute device
if args.distribute == 'single':
    if tf.test.is_gpu_available():
        strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")
    else:
        strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0")
# Single Machine, multiple compute device
elif args.distribute == 'mirror':
    strategy = tf.distribute.MirroredStrategy()
# Multiple Machine, multiple compute device
elif args.distribute == 'multi':
    strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()

# Multi-worker configuration
logging.info('num_replicas_in_sync = {}'.format(strategy.num_replicas_in_sync))


def get_data():

  # Scaling Boston Housing data features
  def scale(feature):
    max = np.max(feature)
    feature = (feature / max).astype(np.float)
    return feature, max

  (x_train, y_train), (x_test, y_test) = tf.keras.datasets.boston_housing.load_data(
    path="boston_housing.npz", test_split=0.2, seed=113
  )
  params = []
  for _ in range(13):
    x_train[_], max = scale(x_train[_])
    x_test[_], _ = scale(x_test[_])
    params.append(max)

  # store the normalization (max) value for each feature
  with tf.io.gfile.GFile(args.param_file, 'w') as f:
    f.write(str(params))
  return (x_train, y_train), (x_test, y_test)


def get_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(128, activation='relu', input_shape=(13,)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(1, activation='linear')
    ])
    model.compile(
        loss='mse',
        optimizer=tf.keras.optimizers.RMSprop(learning_rate=args.lr),
        metrics=["mse"]
    )
    return model


def train_model(model, x_train, y_train, batch_size):
    # Here the batch size scales up by number of workers since
    # `tf.data.Dataset.batch` expects the global batch size.
    GLOBAL_BATCH_SIZE = args.batch_size * strategy.num_replicas_in_sync

    model.fit(x_train, y_train, epochs=args.epochs, batch_size=batch_size)
    return model


def evaluate(model, x_test, y_test, metrics_file):
    metrics = model.evaluate(x_test, y_test)
    with tf.io.gfile.GFile(metrics_file, 'w') as f:
        f.write(str(metrics))


(x_train, y_train), (x_test, y_test) = get_data()

with strategy.scope():
    # Creation of dataset, and model building/compiling need to be within
    # `strategy.scope()`.
    model = get_model()

train_model(model, x_train, y_train, args.batch_size)

if args.evaluate:
    evaluate(model, x_test, y_test, os.path.join(args.model_dir, 'metrics.txt'))

model.save(args.model_dir)

#### Store training script on your Cloud Storage bucket

Next, you package the training folder into a compressed tar ball, and then store it in your Cloud Storage bucket.

In [None]:
! rm -f custom.tar custom.tar.gz
! tar cvf custom.tar custom
! gzip custom.tar
! gsutil cp custom.tar.gz $BUCKET_NAME/trainer_boston.tar.gz

In [None]:
from tensorflow.keras.datasets import boston_housing
import numpy as np

(_, _), (x_test, y_test) = boston_housing.load_data(
    path="boston_housing.npz", test_split=0.2, seed=113)

def scale(feature):
    max = np.max(feature)
    feature = (feature / max).astype(np.float32)
    return feature

# Let's save one data item that has not been scaled
x_test_notscaled = x_test[0:1].copy()

for _ in range(13):
    x_test[_] = scale(x_test[_])
x_test = x_test.astype(np.float32)

print(x_test.shape, x_test.dtype, y_test.shape)
print("scaled", x_test[0])
print("unscaled", x_test_notscaled)

### Create custom evaluation slice

First, for demonstration purposes, you simulate having a custom evaluation slice separate from the train/test/eval dataset, as if it was separate from the train/test/eval dataset.

In [None]:
test_item_1 = x_test[0]
test_label_1 = y_test[0]
test_item_2 = x_test[1]
test_label_2 = y_test[1]
print(test_item_1.shape)

### Make the batch input file

Now make a batch input file, which you will store in your local Cloud Storage bucket.  Each instance in the prediction request is a dictionary entry of the form:

                        {serving_input: content}

- `serving_input`: The name of the input layer of the underlying model.
- `content`: The feature values of the test item as a list.

In [None]:
import json
serving_input = "dense_input"

gcs_input_uri = BUCKET_NAME + "/" + "test.jsonl"
with tf.io.gfile.GFile(gcs_input_uri, 'w') as f:
    data = {serving_input: test_item_1.tolist()}
    f.write(json.dumps(data) + '\n')
    data = {serving_input: test_item_2.tolist()}
    f.write(json.dumps(data) + '\n')

### Create custom model evaluation component

Retrieving evalation statistics from a custom training script is specific to the construction of the training script. This component follows the convention that the training script wrote the evalation metrics to the file `metrics.txt`, located in the same location as the saved model artifacts.

- Takes as input the location where the model artifacts are saved.
- Reads the file `metrics.txt`, within the folder location for model artifacts, as the model evaluation metrics.
- Returns the model evaluation as a string.

In [None]:
@component(packages_to_install=["tensorflow"])
def evaluateCustomModelOp(metrics_dir: str) -> str:
    import tensorflow as tf
    import os
    with tf.io.gfile.GFile(os.path.join(metrics_dir, 'metrics.txt'), 'r') as f:
        metrics = f.read()
    return metrics

### Construct pipeline for custom model training, and batch model evaluation

Next, construct the pipeline with the following tasks:

- Train a custom tabular regression TensorFlow model.
- Retrieve the custom model evaluation statistics.
- Upload the model artifacts to a Vertex AI Model resource.
- Make a batch prediction with the custom model, using an evaluation slice that was not used during training.
- Evaluate the custom model using the results from the batch prediction.

In [None]:
PIPELINE_ROOT = "{}/pipeline_root/custom_lrg_training".format(BUCKET_NAME)

@dsl.pipeline(name="lrg-custom-training",
              description="Custom tabular regression model training"
             )
def pipeline(display_name: str,
             batch_files: list,
             python_package: str,
             python_module: str,
             label: str,
             args: str,
             deploy_image: str,
             bucket: str,
             project: str = PROJECT_ID,
             region: str = REGION):
    from google_cloud_pipeline_components import aiplatform as gcc_aip
    from google_cloud_pipeline_components.v1.batch_predict_job import  ModelBatchPredictOp
    from google_cloud_pipeline_components.v1.model import ModelUploadOp
    from google_cloud_pipeline_components.experimental.evaluation import ModelEvaluationOp


    training_op = gcc_aip.CustomPythonPackageTrainingJobRunOp(
        project=project,
        display_name=display_name,

        # Training
        python_package_gcs_uri=python_package,
        python_module_name=python_module,
        container_uri = TRAIN_IMAGE,
        staging_bucket=PIPELINE_ROOT,
        args=args,
        replica_count=1,
        machine_type=TRAIN_COMPUTE,
        accelerator_type=TRAIN_GPU.name,
        accelerator_count=TRAIN_NGPU,
    )

    eval_op = evaluateCustomModelOp(
        metrics_dir=bucket
    ).after(training_op)

    model_upload_op = ModelUploadOp(
        display_name=display_name,
        artifact_uri=bucket,
        serving_container_image_uri=deploy_image,
        project=project,
        location=region
    ).after(training_op)

    batch_op = ModelBatchPredictOp(
        project=project,
        job_display_name="batch_predict_job",
        model=model_upload_op.outputs["model"],
        gcs_source_uris=batch_files,
        gcs_destination_output_uri_prefix=bucket,
        instances_format="jsonl",
        predictions_format="jsonl",
        model_parameters={},
        machine_type=DEPLOY_COMPUTE,
        starting_replica_count=1,
        max_replica_count=1,
    )
'''
    batch_eval_op = ModelEvaluationOp(
        project=project,
        root_dir=bucket,
        problem_type="regression",
        #classification_type="multiclass",
        ground_truth_column=label,
        #class_names=class_names,
        predictions_format="jsonl",
        batch_prediction_job=batch_op.outputs["batchpredictionjob"],
    )
'''

### Compile and execute the custom model training, and batch model evaluation pipeline

Next, you compile the pipeline and then execute it. The pipeline takes the following parameters, which are passed as the dictionary `parameter_values`:

- `batch_files`:  The Cloud Storage location of the custom eval slice.
- `label`:  BLAH TODO
- `python_package`: The Cloud Storage location of the Python training package.
- `python_module`: The entry module of the Python training package.
- `args`: The command line arguments to pass to the entry module.
- `deploy_image`: The container image for serving predictions.
- `display_name`: Display name for Vertex AI Model resource.
- `bucket`: The Cloud Storage location to store model artifacts and metrics files.
- `project`: The project ID.
- `region`: The region.

In [None]:
compiler.Compiler().compile(
    pipeline_func=pipeline, package_path="custom_lrg_training.json"
)

pipeline = aip.PipelineJob(
    display_name="boston-custom_training",
    template_path="custom_lrg_training.json",
    pipeline_root=PIPELINE_ROOT,
    parameter_values = { 'batch_files': [gcs_input_uri],
                         'display_name': "boston" + TIMESTAMP,
                         'python_package': f"{BUCKET_NAME}/trainer_boston.tar.gz",
                         'python_module': "trainer.task",
                         'label': "??",
                         'args': ["--evaluate", "True", "--model-dir", BUCKET_NAME],
                         'deploy_image': DEPLOY_IMAGE,
                         'bucket': BUCKET_NAME,
                         'project': PROJECT_ID,
                         'region': REGION
                       }

)

pipeline.run()

! rm -f custom_lrg_training.json

### View the cutom model training and batch evaluation pipeline results

In [None]:
PROJECT_NUMBER = pipeline.gca_resource.name.split('/')[1]
print(PROJECT_NUMBER)

def print_pipeline_output(job, output_task_name):
    JOB_ID = job.name
    print(JOB_ID)
    for _ in range(len(job.gca_resource.job_detail.task_details)):
        TASK_ID = job.gca_resource.job_detail.task_details[_].task_id
        EXECUTE_OUTPUT = PIPELINE_ROOT + '/' + PROJECT_NUMBER + '/' + JOB_ID + '/' + output_task_name + '_' + str(TASK_ID) + '/executor_output.json'
        GCP_RESOURCES = PIPELINE_ROOT + '/' + PROJECT_NUMBER + '/' + JOB_ID + '/' + output_task_name + '_' + str(TASK_ID) + '/gcp_resources'
        EVAL_METRICS = PIPELINE_ROOT + '/' + PROJECT_NUMBER + '/' + JOB_ID + '/' + output_task_name + '_' + str(TASK_ID) + '/evaluation_metrics'
        if tf.io.gfile.exists(EXECUTE_OUTPUT):
            ! gsutil cat $EXECUTE_OUTPUT
            return EXECUTE_OUTPUT
        elif tf.io.gfile.exists(GCP_RESOURCES):
            ! gsutil cat $GCP_RESOURCES
            return GCP_RESOURCES
        elif tf.io.gfile.exists(EVAL_METRICS):
            ! gsutil cat $EVAL_METRICS
            return EVAL_METRICS

    return None

print("custompythonpackagetrainingjob-run")
artifacts = print_pipeline_output(pipeline, 'custompythonpackagetrainingjob-run')
print('\n\n')
print("evaluatecustommodelop")
artifacts = print_pipeline_output(pipeline, 'evaluatecustommodelop')
print('\n\n')
print("model-upload")
artifacts = print_pipeline_output(pipeline, 'model-upload')
print('\n\n')
print("model-batch-predict")
artifacts = print_pipeline_output(pipeline, 'model-batch-predict')
output = !gsutil cat $artifacts
output = json.loads(output[0])
print('\n\n')
print(output['artifacts']['batchpredictionjob']['artifacts'][0]['metadata']['gcsOutputDirectory'])
print("model-evaluation")
artifacts = print_pipeline_output(pipeline, 'model-evaluation')
output = !gsutil cat $artifacts
output = json.loads(output[0])
metrics = output['slicedMetrics'][0]['metrics']
print(metrics)

### Delete a pipeline job

After a pipeline job is completed, you can delete the pipeline job with the method `delete()`.  Prior to completion, a pipeline job can be canceled with the method `cancel()`.

In [None]:
pipeline.delete()

# Cleaning up

To clean up all Google Cloud resources used in this project, you can [delete the Google Cloud
project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial.

Otherwise, you can delete the individual resources you created in this tutorial:

- Dataset
- Pipeline
- Model
- Endpoint
- AutoML Training Job
- Batch Job
- Custom Job
- Hyperparameter Tuning Job
- Cloud Storage Bucket

In [None]:
delete_all = True

if delete_all:
    # Delete the dataset using the Vertex dataset object
    try:
        if 'dataset' in globals():
            dataset.delete()
    except Exception as e:
        print(e)

    # Delete the model using the Vertex model object
    try:
        if 'model' in globals():
            model.delete()
    except Exception as e:
        print(e)

    # Delete the endpoint using the Vertex endpoint object
    try:
        if 'endpoint' in globals():
            endpoint.undeploy_all()
            endpoint.delete()
    except Exception as e:
        print(e)

    # Delete the AutoML or Pipeline training job
    try:
        if 'dag' in globals():
            dag.delete()
    except Exception as e:
        print(e)

    # Delete the custom training job
    try:
        if 'job' in globals():
            job.delete()
    except Exception as e:
        print(e)

    # Delete the batch prediction job using the Vertex batch prediction object
    try:
        if 'batch_predict_job' in globals():
            batch_predict_job.delete()
    except Exception as e:
        print(e)

    # Delete the hyperparameter tuning job using the Vertex hyperparameter tuning object
    try:
        if 'hpt_job' in globals():
            hpt_job.delete()
    except Exception as e:
        print(e)

    if 'BUCKET_NAME' in globals():
        ! gsutil rm -r $BUCKET_NAME