In [None]:
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# E2E ML on GCP: MLOps stage 3 : formalization: get started with Dataflow pipeline components

<table align="left">
  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/ml_ops/stage3/get_started_with_dataflow_pipeline_components.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
  <td>
    <a href="https://console.cloud.google.com/ai/platform/notebooks/deploy-notebook?download_url=https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/ml_ops/stage3/get_started_with_dataflow_pipeline_components.ipynb">
      Open in Google Cloud Notebooks
    </a>
  </td>
</table>
<br/><br/><br/>

## Overview


This tutorial demonstrates how to use Vertex AI for E2E MLOps on Google Cloud in production. This tutorial covers stage 3 : formalization: get started with Dataflow pipeline components.

### Dataset

The dataset used for this tutorial is the GSOD dataset from [BigQuery public datasets](https://cloud.google.com/bigquery/public-data). The version of the dataset you use only the fields year, month and day to predict the value of mean daily temperature (mean_temp).

### Objective

In this tutorial, you learn how to use prebuilt `Google Cloud Pipeline Components` for `Dataflow`.

This tutorial uses the following Google Cloud ML services:

- `Vertex AI Pipelines`
- `Google Cloud Pipeline Components`
- `Dataflow`

The steps performed include:

- Build an Apache Beam data pipeline.
- Encapsulate the Apache Beam data pipeline with a Dataflow component in a Vertex AI pipeline.
- Execute a Vertex AI pipeline.

## Installations

Install *one time* the packages for executing the MLOps notebooks.

In [None]:
ONCE_ONLY = False
if ONCE_ONLY:
    ! pip3 install -U tensorflow==2.5 $USER_FLAG
    ! pip3 install -U tensorflow-data-validation==1.2 $USER_FLAG
    ! pip3 install -U tensorflow-transform==1.2 $USER_FLAG
    ! pip3 install -U tensorflow-io==0.18 $USER_FLAG
    ! pip3 install --upgrade google-cloud-aiplatform[tensorboard] $USER_FLAG
    ! pip3 install --upgrade google-cloud-pipeline-components $USER_FLAG
    ! pip3 install --upgrade google-cloud-bigquery $USER_FLAG
    ! pip3 install --upgrade google-cloud-logging $USER_FLAG
    ! pip3 install --upgrade apache-beam[gcp] $USER_FLAG
    ! pip3 install --upgrade pyarrow $USER_FLAG
    ! pip3 install --upgrade cloudml-hypertune $USER_FLAG
    ! pip3 install --upgrade kfp $USER_FLAG
    ! pip3 install --upgrade torchvision $USER_FLAG
    ! pip3 install --upgrade rpy2 $USER_FLAG
    ! pip3 install --upgrade python-tabulate $USER_FLAG
    ! pip3 install -U opencv-python-headless==4.5.2.52 $USER_FLAG

### Restart the kernel

Once you've installed the additional packages, you need to restart the notebook kernel so it can find the packages.

In [None]:
import os

if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

#### Set your project ID

**If you don't know your project ID**, you may be able to get your project ID using `gcloud`.

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

In [None]:
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = ! gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)

In [None]:
! gcloud config set project $PROJECT_ID

#### Region

You can also change the `REGION` variable, which is used for operations
throughout the rest of this notebook.  Below are regions supported for Vertex AI. We recommend that you choose the region closest to you.

- Americas: `us-central1`
- Europe: `europe-west4`
- Asia Pacific: `asia-east1`

You may not use a multi-regional bucket for training with Vertex AI. Not all regions provide support for all Vertex AI services.

Learn more about [Vertex AI regions](https://cloud.google.com/vertex-ai/docs/general/locations).

In [None]:
REGION = "us-central1"  # @param {type: "string"}

#### Timestamp

If you are in a live tutorial session, you might be using a shared test account or project. To avoid name collisions between users on resources created, you create a timestamp for each instance session, and append the timestamp onto the name of resources you create in this tutorial.

In [None]:
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

### Create a Cloud Storage bucket

**The following steps are required, regardless of your notebook environment.**

When you initialize the Vertex SDK for Python, you specify a Cloud Storage staging bucket. The staging bucket is where all the data associated with your dataset and model resources are retained across sessions.

Set the name of your Cloud Storage bucket below. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.

In [None]:
BUCKET_NAME = "gs://[your-bucket-name]"  # @param {type:"string"}

In [None]:
if BUCKET_NAME == "" or BUCKET_NAME is None or BUCKET_NAME == "gs://[your-bucket-name]":
    BUCKET_NAME = "gs://" + PROJECT_ID + "aip-" + TIMESTAMP

**Only if your bucket doesn't already exist**: Run the following cell to create your Cloud Storage bucket.

In [None]:
! gsutil mb -l $REGION $BUCKET_NAME

Finally, validate access to your Cloud Storage bucket by examining its contents:

In [None]:
! gsutil ls -al $BUCKET_NAME

#### Service Account

**If you don't know your service account**, try to get your service account using `gcloud` command by executing the second cell below.

In [None]:
SERVICE_ACCOUNT = "[your-service-account]"  # @param {type:"string"}

In [None]:
if (
    SERVICE_ACCOUNT == ""
    or SERVICE_ACCOUNT is None
    or SERVICE_ACCOUNT == "[your-service-account]"
):
    # Get your GCP project id from gcloud
    shell_output = !gcloud auth list 2>/dev/null
    SERVICE_ACCOUNT = shell_output[2].replace("*", "").strip()
    print("Service Account:", SERVICE_ACCOUNT)

#### Set service account access for Vertex AI Pipelines

Run the following commands to grant your service account access to read and write pipeline artifacts in the bucket that you created in the previous step -- you only need to run these once per service account.

In [None]:
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectCreator $BUCKET_NAME

! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectViewer $BUCKET_NAME

### Set up variables

Next, set up some variables used throughout the tutorial.
### Import libraries and define constants

In [None]:
import google.cloud.aiplatform as aip

In [None]:
import json

from kfp import dsl
from kfp.v2 import compiler
from kfp.v2.dsl import component

In [None]:
from google_cloud_pipeline_components.v1.dataflow import DataflowPythonJobOp
from google_cloud_pipeline_components.v1.wait_gcp_resources import \
    WaitGcpResourcesOp

### Initialize Vertex AI SDK for Python

Initialize the Vertex AI SDK for Python for your project and corresponding bucket.

In [None]:
aip.init(project=PROJECT_ID, staging_bucket=BUCKET_NAME)

### Write the Apache Beam pipeline module

First, you write the Python module for the Dataflow pipeline. Since it is a module, you additional add the `if __name__ == '__main__':` entry point and use `argparse` to pass command line arguments to the module.

This module implements the Apache Beam word count example.

In [None]:
%%writefile wc.py
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""A minimalist word-counting workflow that counts words in Shakespeare.

This is the first in a series of successively more detailed 'word count'
examples.

Next, see the wordcount pipeline, then the wordcount_debugging pipeline, for
more detailed examples that introduce additional concepts.

Concepts:

1. Reading data from text files
2. Specifying 'inline' transforms
3. Counting a PCollection
4. Writing data to Cloud Storage as text files

To execute this pipeline locally, first edit the code to specify the output
location. Output location could be a local file path or an output prefix
on GCS. (Only update the output location marked with the first CHANGE comment.)

To execute this pipeline remotely, first edit the code to set your project ID,
runner type, the staging location, the temp location, and the output location.
The specified GCS bucket(s) must already exist. (Update all the places marked
with a CHANGE comment.)

Then, run the pipeline as described in the README. It will be deployed and run
using the Google Cloud Dataflow Service. No args are required to run the
pipeline. You can see the results in your output bucket in the GCS browser.
"""

from __future__ import absolute_import

import argparse
import logging
import re

from past.builtins import unicode

import apache_beam as beam
from apache_beam.io import ReadFromText
from apache_beam.io import WriteToText
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import SetupOptions


def run(argv=None):
  """Main entry point; defines and runs the wordcount pipeline."""

  parser = argparse.ArgumentParser()
  parser.add_argument('--input',
                      dest='input',
                      default='gs://dataflow-samples/shakespeare/kinglear.txt',
                      help='Input file to process.')
  parser.add_argument('--output',
                      dest='output',
                      # CHANGE 1/5: The Google Cloud Storage path is required
                      # for outputting the results.
                      default='gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX',
                      help='Output file to write results to.')
  known_args, pipeline_args = parser.parse_known_args(argv)
  # pipeline_args.extend([
  #     # CHANGE 2/5: (OPTIONAL) Change this to DataflowRunner to
  #     # run your pipeline on the Google Cloud Dataflow Service.
  #     '--runner=DirectRunner',
  #     # CHANGE 3/5: Your project ID is required in order to run your pipeline on
  #     # the Google Cloud Dataflow Service.
  #     '--project=SET_YOUR_PROJECT_ID_HERE',
  #     # CHANGE 4/5: Your Google Cloud Storage path is required for staging local
  #     # files.
  #     '--staging_location=gs://YOUR_BUCKET_NAME/AND_STAGING_DIRECTORY',
  #     # CHANGE 5/5: Your Google Cloud Storage path is required for temporary
  #     # files.
  #     '--temp_location=gs://YOUR_BUCKET_NAME/AND_TEMP_DIRECTORY',
  #     '--job_name=your-wordcount-job',
  # ])

  # We use the save_main_session option because one or more DoFn's in this
  # workflow rely on global context (e.g., a module imported at module level).
  pipeline_options = PipelineOptions(pipeline_args)
  pipeline_options.view_as(SetupOptions).save_main_session = True
  with beam.Pipeline(options=pipeline_options) as p:

    # Read the text file[pattern] into a PCollection.
    lines = p | ReadFromText(known_args.input)

    # Count the occurrences of each word.
    counts = (
        lines
        | 'Split' >> (beam.FlatMap(lambda x: re.findall(r'[A-Za-z\']+', x))
                      .with_output_types(unicode))
        | 'PairWithOne' >> beam.Map(lambda x: (x, 1))
        | 'GroupAndSum' >> beam.CombinePerKey(sum))

    # Format the counts into a PCollection of strings.
    def format_result(word_count):
      (word, count) = word_count
      return '%s: %s' % (word, count)

    output = counts | 'Format' >> beam.Map(format_result)

    # Write the output using a "Write" transform that has side effects.
    # pylint: disable=expression-not-assigned
    output | WriteToText(known_args.output)


if __name__ == '__main__':
  logging.getLogger().setLevel(logging.INFO)
  run()

### Write the requirements (installs) for the Apache Beam pipeline module

Next, create the `requirements.txt` file to specify Python modules that are required to be installed for executing the Apache Beam pipeline module -- in this case, `apache-beam` is required.

In [None]:
%%writefile requirements.txt
apache-beam
future

### Copy python module and requirements file to Cloud Storage

Next, you copy the Python module and requirements file to your Cloud Storage bucket.

Additional, you set the Cloud Storage location for the output of the Apache Beam word count pipeline.

In [None]:
GCS_WC_PY = BUCKET_NAME + "/wc.py"
! gsutil cp wc.py $GCS_WC_PY
GCS_REQUIREMENTS_TXT = BUCKET_NAME + "/requirements.txt"
! gsutil cp requirements.txt $GCS_REQUIREMENTS_TXT

GCS_WC_OUT = BUCKET_NAME + "/wc_out.txt"

### Create and execute the pipeline job

In this example, the `DataflowPythonJobOp` component takes the following parameters:

- `project_id`: The project ID.
- `location`: The region.
- `python_module_path`: The Cloud Storage location of the Apache Beam pipeline.
- `temp_location`: The Cloud Storage temporary file workspace for the Apache Beam pipeline.
- `requirements_file_path`: The required Python modules to install.
- `args`: The arguments to pass to the Apache Beam pipeline.

Learn more about [Google Cloud Pipeline Component for Dataflow](https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-0.2.0/google_cloud_pipeline_components.experimental.dataflow.html)

In [None]:
import json

PIPELINE_ROOT = "{}/pipeline_root/dataflow_wc".format(BUCKET_NAME)


@dsl.pipeline(name="dataflow-wc", description="Dataflow word count component pipeline")
def pipeline(
    python_file_path: str = GCS_WC_PY,
    project_id: str = PROJECT_ID,
    location: str = REGION,
    staging_dir: str = PIPELINE_ROOT,
    args: list = ["--output", GCS_WC_OUT, "--runner", "DataflowRunner"],
    requirements_file_path: str = GCS_REQUIREMENTS_TXT,
):

    dataflow_python_op = DataflowPythonJobOp(
        project=project_id,
        location=location,
        python_module_path=python_file_path,
        temp_location=staging_dir,
        requirements_file_path=requirements_file_path,
        args=args,
    )

    dataflow_wait_op = WaitGcpResourcesOp(
        gcp_resources=dataflow_python_op.outputs["gcp_resources"]
    )


compiler.Compiler().compile(pipeline_func=pipeline, package_path="dataflow_wc.json")

pipeline = aip.PipelineJob(
    display_name="dataflow_wc",
    template_path="dataflow_wc.json",
    pipeline_root=PIPELINE_ROOT,
    enable_caching=False,
)

pipeline.run()

! gsutil cat {GCS_WC_OUT}* | head -n10

! rm -f dataflow_wc.json wc.py requirements.txt

### Delete a pipeline job

After a pipeline job is completed, you can delete the pipeline job with the method `delete()`.  Prior to completion, a pipeline job can be canceled with the method `cancel()`.

In [None]:
pipeline.delete()

### Write the Apache Beam pipeline module

Next, you write the Python module for the Apache Beam pipeline. This module implements the a dataset split task into training and test data, and writes the split dataset as CSV files to a Cloud Storage bucket. In this example, the Python module will recieve some arguments for the pipeline from the command-line, which will be passed by the Dataflow pipeline component.

*Note:* The Dataflow prebuilt component implicitly adds Dataflow-specific command-line arguments, such as `project`, `location`, `runner`, and `temp_location`.

In [None]:
%%writefile split.py

import argparse
import logging
import tensorflow_transform.beam as tft_beam


from past.builtins import unicode

import apache_beam as beam
from apache_beam.io import ReadFromText
from apache_beam.io import WriteToText
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import SetupOptions

def run(argv=None):
  """Main entry point; defines and runs the wordcount pipeline."""

  parser = argparse.ArgumentParser()
  parser.add_argument('--bq_table',
                      dest='bq_table')
  parser.add_argument('--bucket',
                      dest='bucket')
  args, pipeline_args = parser.parse_known_args(argv)
  logging.info("ARGS")
  logging.info(args)
  logging.info("PIPELINE ARGS")
  logging.info(pipeline_args)
  for i in range(0, len(pipeline_args), 2):
        if "--temp_location" == pipeline_args[i]:
            temp_location = pipeline_args[i+1]
        elif "--project" == pipeline_args[i]:
            project = pipeline_args[i+1]

  exported_train = args.bucket + '/exported_data/train'
  exported_eval  = args.bucket + '/exported_data/eval'


  pipeline_options = PipelineOptions(pipeline_args)
  pipeline_options.view_as(SetupOptions).save_main_session = True
  with beam.Pipeline(options=pipeline_options) as pipeline:
    with tft_beam.Context(temp_location):

        raw_data_query = "SELECT {0},{1} FROM {2} LIMIT 500".format("CAST(station_number as STRING) AS station_number,year,month,day","mean_temp", args.bq_table)

        def parse_bq_record(bq_record):
            """Parses a bq_record to a dictionary."""
            output = {}
            for key in bq_record:
                output[key] = [bq_record[key]]
            return output

        def split_dataset(bq_row, num_partitions, ratio):
            """Returns a partition number for a given bq_row."""
            import json

            assert num_partitions == len(ratio)
            bucket = sum(map(ord, json.dumps(bq_row))) % sum(ratio)
            total = 0
            for i, part in enumerate(ratio):
                total += part
                if bucket < total:
                    return i
            return len(ratio) - 1

        # Read raw BigQuery data.
        raw_train_data, raw_eval_data = (
            pipeline
            | "Read Raw Data"
            >> beam.io.ReadFromBigQuery(
                query=raw_data_query,
                project=project,
                use_standard_sql=True,
            )
            | "Parse Data" >> beam.Map(parse_bq_record)
            | "Split" >> beam.Partition(split_dataset, 2, ratio=[8, 2])
        )

        # Write raw train data to GCS .
        _ = raw_train_data | "Write Raw Train Data" >> beam.io.WriteToText(
            file_path_prefix=exported_train, file_name_suffix=".csv"
        )

        # Write raw eval data to GCS .
        _ = raw_eval_data | "Write Raw Eval Data" >> beam.io.WriteToText(
            file_path_prefix=exported_eval, file_name_suffix=".csv"
        )


if __name__ == '__main__':
  logging.getLogger().setLevel(logging.INFO)
  run()

### Write the requirements (installs) for the Apache Beam pipeline module

Next, create the `requirements.txt` file to specify Python modules that are required to be installed for executing the Apache Beam pipeline module -- in this case, `apache-beam` and `tensorflow-transform` are required.

In [None]:
%%writefile requirements.txt
apache-beam
tensorflow-transform==1.2.0
future

### Write the setup.py (installs) for the Dataflow workers

Next, create the `setup.py` file to specify Python modules that are required to be installed for executing the Dataflow workers -- in this case, `tensorflow-transform` is required.

In [None]:
%%writefile setup.py
import setuptools

REQUIRED_PACKAGES = [
    'tensorflow-transform==1.2.0',
    'future'
]
PACKAGE_NAME = 'my_package'
PACKAGE_VERSION = '0.0.1'
setuptools.setup(
    name=PACKAGE_NAME,
    version=PACKAGE_VERSION,
    description='Demo for split transformation',
    install_requires=REQUIRED_PACKAGES,
    author="cdpe@google.com",
    packages=setuptools.find_packages()
)

### Copy python module and requirements file to Cloud Storage

Next, you copy the Python module, requirements and setup file to your Cloud Storage bucket.

Additional, you set the Cloud Storage location for the output of the Apache Beam dataset split pipeline.

In [None]:
GCS_SPLIT_PY = BUCKET_NAME + "/split.py"
! gsutil cp split.py $GCS_SPLIT_PY
GCS_REQUIREMENTS_TXT = BUCKET_NAME + "/requirements.txt"
! gsutil cp requirements.txt $GCS_REQUIREMENTS_TXT
GCS_SETUP_PY = BUCKET_NAME + "/setup.py"
! gsutil cp setup.py $GCS_SETUP_PY

#### Location of BigQuery training data.

Now set the variable `IMPORT_FILE` to the location of the data table in BigQuery.

In [None]:
IMPORT_FILE = "bq://bigquery-public-data.samples.gsod"
BQ_TABLE = "bigquery-public-data.samples.gsod"

### Create and execute the pipeline job

In this example, the `DataflowPythonJobOp` component takes the following parameters:

- `project_id`: The project ID.
- `location`: The region.
- `python_module_path`: The Cloud Storage location of the Apache Beam pipeline.
- `temp_location`: The Cloud Storage temporary file workspace for the Apache Beam pipeline.
- `requirements_file_path`: The required Python modules to install.
- `args`: The arguments to pass to the Apache Beam pipeline.

Learn more about [Google Cloud Pipeline Component for Dataflow](https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-0.2.0/google_cloud_pipeline_components.experimental.dataflow.html)

Additional, you add `--runner=DataflowRunner` to the input args, to tell the component to use Dataflow instead of DirectRunner for the Apache Beam job.

In [None]:
PIPELINE_ROOT = "{}/pipeline_root/dataflow_split".format(BUCKET_NAME)


@dsl.pipeline(name="dataflow-split", description="Dataflow split dataset")
def pipeline(
    python_file_path: str = GCS_SPLIT_PY,
    project_id: str = PROJECT_ID,
    location: str = REGION,
    staging_dir: str = PIPELINE_ROOT,
    args: list = [
        "--bucket",
        BUCKET_NAME,
        "--bq_table",
        BQ_TABLE,
        "--runner",
        "DataflowRunner",
        "--setup_file",
        GCS_SETUP_PY,
    ],
    requirements_file_path: str = GCS_REQUIREMENTS_TXT,
):
    # DataflowPythonJobOp.component_spec.implementation.container.image = "gcr.io/ml-pipeline/google-cloud-pipeline-components:v0.2.0_dataflow_logs_fix"
    dataflow_python_op = DataflowPythonJobOp(
        project=project_id,
        location=location,
        python_module_path=python_file_path,
        temp_location=staging_dir,
        requirements_file_path=requirements_file_path,
        args=args,
    )

    dataflow_wait_op = WaitGcpResourcesOp(
        gcp_resources=dataflow_python_op.outputs["gcp_resources"]
    )


compiler.Compiler().compile(pipeline_func=pipeline, package_path="dataflow_split.json")

pipeline = aip.PipelineJob(
    display_name="dataflow_split",
    template_path="dataflow_split.json",
    pipeline_root=PIPELINE_ROOT,
    enable_caching=False,
)

pipeline.run()

! gsutil ls {BUCKET_NAME}/exported_data

! rm -f dataflow_split.json split.py requirements.txt

### Delete a pipeline job

After a pipeline job is completed, you can delete the pipeline job with the method `delete()`.  Prior to completion, a pipeline job can be canceled with the method `cancel()`.

In [None]:
pipeline.delete()

# Cleaning up

To clean up all Google Cloud resources used in this project, you can [delete the Google Cloud
project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial.

Otherwise, you can delete the individual resources you created in this tutorial:

- Dataset
- Pipeline
- Model
- Endpoint
- AutoML Training Job
- Batch Job
- Custom Job
- Hyperparameter Tuning Job
- Cloud Storage Bucket

In [None]:
delete_all = True

if delete_all:
    # Delete the dataset using the Vertex dataset object
    try:
        if "dataset" in globals():
            dataset.delete()
    except Exception as e:
        print(e)

    # Delete the model using the Vertex model object
    try:
        if "model" in globals():
            model.delete()
    except Exception as e:
        print(e)

    # Delete the endpoint using the Vertex endpoint object
    try:
        if "endpoint" in globals():
            endpoint.undeploy_all()
            endpoint.delete()
    except Exception as e:
        print(e)

    # Delete the AutoML or Pipeline training job
    try:
        if "dag" in globals():
            dag.delete()
    except Exception as e:
        print(e)

    # Delete the custom training job
    try:
        if "job" in globals():
            job.delete()
    except Exception as e:
        print(e)

    # Delete the batch prediction job using the Vertex batch prediction object
    try:
        if "batch_predict_job" in globals():
            batch_predict_job.delete()
    except Exception as e:
        print(e)

    # Delete the hyperparameter tuning job using the Vertex hyperparameter tuning object
    try:
        if "hpt_job" in globals():
            hpt_job.delete()
    except Exception as e:
        print(e)

    if "BUCKET_NAME" in globals():
        ! gsutil rm -r $BUCKET_NAME