In [None]:
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# AI Platform (Unified) client library: Custom training text binary classification model for online prediction using exported dataset

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/ai-platform-samples/blob/master/ai-platform-unified/showcase_custom_text_binary_classification_online_exported_ds.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Run in Colab
    </a>
  </td>
  <td>
    <a href="https://github.com/GoogleCloudPlatform/ai-platform-samples/blob/master/ai-platform-unified/showcase_custom_text_binary_classification_online_exported_ds.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
</table>
<br/><br/><br/>

## Overview

This tutorial demonstrates how to use the AI Platform (Unified) Python client library to train and deploy a custom text binary classification model for online prediction, using an exported `Dataset` resource.


### Objective

In this tutorial, you learn how to create a custom model using an exported `Dataset` resource from a Python script in a Docker container using the AI Platform (Unified) client library, and then do a prediction on the deployed model. You can alternatively create custom models from the command line using `gcloud` or online using Google Cloud Console.

The steps performed include:

- Create a AI Platform (Unified) `Dataset` resource.
- Export the `Dataset` resource's manifest.
- Create a AI Platform (Unified) custom job for training a model.
- Import the exported dataset manifest.
- Train the model.
- Retrieve and load the model artifacts.
- View the model evaluation.
- Upload the model as a AI Platform (Unified) `Model` resource.
- Deploy the `Model` resource to a serving `Endpoint` resource.
- Make a prediction.
- Undeploy the `Model` resource.

### Costs

This tutorial uses billable components of Google Cloud (GCP):

* AI Platform (Unified)
* Cloud Storage

Learn about [AI Platform (Unified)
pricing](https://cloud.google.com/ai-platform-unified/pricing) and [Cloud Storage
pricing](https://cloud.google.com/storage/pricing), and use the [Pricing
Calculator](https://cloud.google.com/products/calculator/)
to generate a cost estimate based on your projected usage.

## Installation

Install the latest version of AI Platform (Unified) client library.

In [None]:
import sys
if 'google.colab' in sys.modules:
    USER_FLAG = ''
else:
    USER_FLAG = '--user'

! pip3 install -U google-cloud-aiplatform $USER_FLAG

Install the latest GA version of *google-cloud-storage* library as well.

In [None]:
! pip3 install -U google-cloud-storage $USER_FLAG

### Restart the kernel

Once you've installed the AI Platform (Unified) client library and Google *cloud-storage*, you need to restart the notebook kernel so it can find the packages.

In [None]:
import os
if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython
    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

## Before you begin

### GPU runtime

*Make sure you're running this notebook in a GPU runtime if you have that option. In Colab, select* **Runtime > Change Runtime Type > GPU**

### Set up your Google Cloud project

**The following steps are required, regardless of your notebook environment.**

1. [Select or create a Google Cloud project](https://console.cloud.google.com/cloud-resource-manager). When you first create an account, you get a $300 free credit towards your compute/storage costs.

2. [Make sure that billing is enabled for your project.](https://cloud.google.com/billing/docs/how-to/modify-project)

3. [Enable the AI Platform (Unified) APIs and Compute Engine APIs.](https://console.cloud.google.com/flows/enableapi?apiid=ml.googleapis.com,compute_component)

4. [The Google Cloud SDK](https://cloud.google.com/sdk) is already installed in AI Platform (Unified) Notebooks.

5. Enter your project ID in the cell below. Then run the  cell to make sure the
Cloud SDK uses the right project for all the commands in this notebook.

**Note**: Jupyter runs lines prefixed with `!` as shell commands, and it interpolates Python variables prefixed with `$` into these commands.

In [None]:
PROJECT_ID = "[your-project-id]" #@param {type:"string"}

In [None]:
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)

In [None]:
! gcloud config set project $PROJECT_ID

#### Region

You can also change the `REGION` variable, which is used for operations
throughout the rest of this notebook.  Below are regions supported for AI Platform (Unified). We recommend that you choose the region closest to you.

- Americas: `us-central1`
- Europe: `europe-west4`
- Asia Pacific: `asia-east1`

You may not use a multi-regional bucket for training with AI Platform (Unified). Not all regions provide support for all AI Platform (Unified) services. For the latest support per region, see the [AI Platform (Unified) locations documentation](https://cloud.google.com/ai-platform-unified/docs/general/locations)

In [None]:
REGION = 'us-central1' #@param {type: "string"}

#### Timestamp

If you are in a live tutorial session, you might be using a shared test account or project. To avoid name collisions between users on resources created, you create a timestamp for each instance session, and append onto the name of resources which will be created in this tutorial.

In [None]:
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

### Authenticate your Google Cloud account

**If you are using AI Platform (Unified) notebooks**, your environment is already
authenticated. Skip this step.

*Note: If you are on an AI Platform (Unified) notebook and run the cell, the cell knows to skip executing the authentication steps.*

In [None]:
import os
import sys

# If you are running this notebook in Colab, run this cell and follow the
# instructions to authenticate your Google Cloud account. This provides access
# to your Cloud Storage bucket and lets you submit training jobs and prediction
# requests.

# If on AI Platform (Unified) notebooks, then don't execute this code
if not os.path.exists('/opt/deeplearning/metadata/env_version'):
    if 'google.colab' in sys.modules:
        from google.colab import auth as google_auth
        google_auth.authenticate_user()

    # If you are running this tutorial in a notebook locally, replace the string
    # below with the path to your service account key and run this cell to
    # authenticate your Google Cloud account.
    else:
        %env GOOGLE_APPLICATION_CREDENTIALS your_path_to_credentials.json

    # Log in to your account on Google Cloud
    ! gcloud auth login

### Create a Cloud Storage bucket

**The following steps are required, regardless of your notebook environment.**

When you submit a custom training job using the AI Platform (Unified) client library, you upload a Python package
containing your training code to a Cloud Storage bucket. AI Platform (Unified) runs
the code from this package. In this tutorial, AI Platform (Unified) also saves the
trained model that results from your job in the same bucket. You can then
create an `Endpoint` resource based on this output in order to serve
online predictions.

Set the name of your Cloud Storage bucket below. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.

In [None]:
BUCKET_NAME = "gs://[your-bucket-name]" #@param {type:"string"}

In [None]:
if BUCKET_NAME == "" or BUCKET_NAME is None or BUCKET_NAME == "gs://[your-bucket-name]":
    BUCKET_NAME = "gs://" + PROJECT_ID + "aip-" + TIMESTAMP

**Only if your bucket doesn't already exist**: Run the following cell to create your Cloud Storage bucket.

In [None]:
! gsutil mb -l $REGION $BUCKET_NAME

Finally, validate access to your Cloud Storage bucket by examining its contents:

In [None]:
! gsutil ls -al $BUCKET_NAME

### Set up variables

Next, set up some variables used throughout the tutorial.
### Import libraries and define constants

#### Import AI Platform (Unified) client library

Import the AI Platform (Unified) client library into our Python environment.

In [None]:
import os
import sys
import time

import google.cloud.aiplatform_v1 as aip

from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value
from google.protobuf.struct_pb2 import Struct
from google.protobuf.json_format import MessageToJson
from google.protobuf.json_format import ParseDict

#### AI Platform (Unified) constants

Setup up the following constants for AI Platform (Unified):

- `API_ENDPOINT`: The AI Platform (Unified) API service endpoint for dataset, model, job, pipeline and endpoint services.
- `PARENT`: The AI Platform (Unified) location root path for dataset, model, job, pipeline and endpoint resources.

In [None]:
# API service endpoint
API_ENDPOINT = "{0}-aiplatform.googleapis.com".format(REGION)

# AI Platform (Unified) location root path for your dataset, model and endpoint resources
PARENT = "projects/" + PROJECT_ID + "/locations/" + REGION

#### Hardware Accelerators

Set the hardware accelerators (e.g., GPU), if any, for training and prediction.

Set the variables `TRAIN_GPU/TRAIN_NGPU` and `DEPLOY_GPU/DEPLOY_NGPU` to use a container image supporting a GPU and the number of GPUs allocated to the virtual machine (VM) instance. For example, to use a GPU container image with 4 Nvidia Telsa K80 GPUs allocated to each VM, you would specify:

    (aip.AcceleratorType.NVIDIA_TESLA_K80, 4)

For GPU, available accelerators include:
   - aip.AcceleratorType.NVIDIA_TESLA_K80
   - aip.AcceleratorType.NVIDIA_TESLA_P100
   - aip.AcceleratorType.NVIDIA_TESLA_P4
   - aip.AcceleratorType.NVIDIA_TESLA_T4
   - aip.AcceleratorType.NVIDIA_TESLA_V100


Otherwise specify `(None, None)` to use a container image to run on a CPU.

*Note*: TF releases before 2.3 for GPU support will fail to load the custom model in this tutorial. It is a known issue and fixed in TF 2.3 -- which is caused by static graph ops that are generated in the serving function. If you encounter this issue on your own custom models, use a container image for TF 2.3 with GPU support.

In [None]:
if os.getenv("IS_TESTING_TRAIN_GPU"):
    TRAIN_GPU, TRAIN_NGPU = (aip.AcceleratorType.NVIDIA_TESLA_K80, int(os.getenv("IS_TESTING_TRAIN_GPU")))
else:
    TRAIN_GPU, TRAIN_NGPU = (aip.AcceleratorType.NVIDIA_TESLA_K80, 1)

if os.getenv("IS_TESTING_DEPOLY_GPU"):
    DEPLOY_GPU, DEPLOY_NGPU = (aip.AcceleratorType.NVIDIA_TESLA_K80, int(os.getenv("IS_TESTING_DEPOLY_GPU")))
else:
    DEPLOY_GPU, DEPLOY_NGPU = (None, None)

#### Container (Docker) image

Next, we will set the Docker container images for training and prediction

 - TensorFlow 1.15
   - `gcr.io/cloud-aiplatform/training/tf-cpu.1-15:latest`
   - `gcr.io/cloud-aiplatform/training/tf-gpu.1-15:latest`
 - TensorFlow 2.1
   - `gcr.io/cloud-aiplatform/training/tf-cpu.2-1:latest`
   - `gcr.io/cloud-aiplatform/training/tf-gpu.2-1:latest`
 - TensorFlow 2.2
   - `gcr.io/cloud-aiplatform/training/tf-cpu.2-2:latest`
   - `gcr.io/cloud-aiplatform/training/tf-gpu.2-2:latest`
 - TensorFlow 2.3
   - `gcr.io/cloud-aiplatform/training/tf-cpu.2-3:latest`
   - `gcr.io/cloud-aiplatform/training/tf-gpu.2-3:latest`
 - TensorFlow 2.4
   - `gcr.io/cloud-aiplatform/training/tf-cpu.2-4:latest`
   - `gcr.io/cloud-aiplatform/training/tf-gpu.2-4:latest`
 - XGBoost
   - `gcr.io/cloud-aiplatform/training/xgboost-cpu.1-1`
 - Scikit-learn
   - `gcr.io/cloud-aiplatform/training/scikit-learn-cpu.0-23:latest`
 - Pytorch
   - `gcr.io/cloud-aiplatform/training/pytorch-cpu.1-4:latest`
   - `gcr.io/cloud-aiplatform/training/pytorch-cpu.1-5:latest`
   - `gcr.io/cloud-aiplatform/training/pytorch-cpu.1-6:latest`
   - `gcr.io/cloud-aiplatform/training/pytorch-cpu.1-7:latest`

For the latest list, see [Pre-built containers for training](https://cloud.google.com/ai-platform-unified/docs/training/pre-built-containers).

 - TensorFlow 1.15
   - `gcr.io/cloud-aiplatform/prediction/tf-cpu.1-15:latest`
   - `gcr.io/cloud-aiplatform/prediction/tf-gpu.1-15:latest`
 - TensorFlow 2.1
   - `gcr.io/cloud-aiplatform/prediction/tf2-cpu.2-1:latest`
   - `gcr.io/cloud-aiplatform/prediction/tf2-gpu.2-1:latest`
 - TensorFlow 2.2
   - `gcr.io/cloud-aiplatform/prediction/tf2-cpu.2-2:latest`
   - `gcr.io/cloud-aiplatform/prediction/tf2-gpu.2-2:latest`
 - TensorFlow 2.3
   - `gcr.io/cloud-aiplatform/prediction/tf2-cpu.2-3:latest`
   - `gcr.io/cloud-aiplatform/prediction/tf2-gpu.2-3:latest`
 - XGBoost
   - `gcr.io/cloud-aiplatform/prediction/xgboost-cpu.1-2:latest`
   - `gcr.io/cloud-aiplatform/prediction/xgboost-cpu.1-1:latest`
   - `gcr.io/cloud-aiplatform/prediction/xgboost-cpu.0-90:latest`
   - `gcr.io/cloud-aiplatform/prediction/xgboost-cpu.0-82:latest`
 - Scikit-learn
   - `gcr.io/cloud-aiplatform/prediction/sklearn-cpu.0-23:latest`
   - `gcr.io/cloud-aiplatform/prediction/sklearn-cpu.0-22:latest`
   - `gcr.io/cloud-aiplatform/prediction/sklearn-cpu.0-20:latest`

For the latest list, see [Pre-built containers for prediction](https://cloud.google.com/ai-platform-unified/docs/predictions/pre-built-containers)

In [None]:
if os.getenv("IS_TESTING_TF"):
    TF = os.getenv("IS_TESTING_TF")
else:
    TF = '2-1'

if TF[0] == '2':
    if TRAIN_GPU:
        TRAIN_VERSION = 'tf-gpu.{}'.format(TF)
    else:
        TRAIN_VERSION = 'tf-cpu.{}'.format(TF)
    if DEPLOY_GPU:
        DEPLOY_VERSION = 'tf2-gpu.{}'.format(TF)
    else:
        DEPLOY_VERSION = 'tf2-cpu.{}'.format(TF)
else:
    if TRAIN_GPU:
        TRAIN_VERSION = 'tf-gpu.{}'.format(TF)
    else:
        TRAIN_VERSION = 'tf-cpu.{}'.format(TF)
    if DEPLOY_GPU:
        DEPLOY_VERSION = 'tf-gpu.{}'.format(TF)
    else:
        DEPLOY_VERSION = 'tf-cpu.{}'.format(TF)

TRAIN_IMAGE  = "gcr.io/cloud-aiplatform/training/{}:latest".format(TRAIN_VERSION)
DEPLOY_IMAGE = "gcr.io/cloud-aiplatform/prediction/{}:latest".format(DEPLOY_VERSION)

print("Training:", TRAIN_IMAGE, TRAIN_GPU, TRAIN_NGPU)
print("Deployment:", DEPLOY_IMAGE, DEPLOY_GPU, DEPLOY_NGPU)

#### Machine Type

Next, set the machine type to use for training and prediction.

- Set the variables `TRAIN_COMPUTE` and `DEPLOY_COMPUTE` to configure  the compute resources for the VMs you will use for for training and prediction.
 - `machine type`
     - `n1-standard`: 3.75GB of memory per vCPU.
     - `n1-highmem`: 6.5GB of memory per vCPU
     - `n1-highcpu`: 0.9 GB of memory per vCPU
 - `vCPUs`: number of \[2, 4, 8, 16, 32, 64, 96 \]

*Note: The following is not supported for training:*

 - `standard`: 2 vCPUs
 - `highcpu`: 2, 4 and 8 vCPUs

*Note: You may also use n2 and e2 machine types for training and deployment, but they do not support GPUs*.

In [None]:
if os.getenv("IS_TESTING_TRAIN_MACHINE"):
    MACHINE_TYPE = os.getenv("IS_TESTING_TRAIN_MACHINE")
else:
    MACHINE_TYPE = 'n1-standard'

VCPU = '4'
TRAIN_COMPUTE = MACHINE_TYPE + '-' + VCPU
print('Train machine type', TRAIN_COMPUTE)

if os.getenv("IS_TESTING_DEPLOY_MACHINE"):
    MACHINE_TYPE = os.getenv("IS_TESTING_DEPLOY_MACHINE")
else:
    MACHINE_TYPE = 'n1-standard'

VCPU = '4'
DEPLOY_COMPUTE = MACHINE_TYPE + '-' + VCPU
print('Deploy machine type', DEPLOY_COMPUTE)

# Tutorial

Now you are ready to start creating your own custom model and training for .

## Set up clients

The AI Platform (Unified) client library works as a client/server model. On your side (the Python script) you will create a client that sends requests and receives responses from the AI Platform (Unified) server.

You will use different clients in this tutorial for different steps in the workflow. So set them all up upfront.

- Dataset Service for `Dataset` resources.
- Model Service for `Model` resources.
- Endpoint Service for deployment.
- Job Service for batch jobs and custom training.
- Prediction Service for serving.

In [None]:
# client options same for all services
client_options = {"api_endpoint": API_ENDPOINT}


def create_job_client():
    client = aip.JobServiceClient(
        client_options=client_options
    )
    return client


def create_dataset_client():
    client = aip.DatasetServiceClient(
        client_options=client_options
    )
    return client


def create_model_client():
    client = aip.ModelServiceClient(
        client_options=client_options
    )
    return client


def create_endpoint_client():
    client = aip.EndpointServiceClient(
        client_options=client_options
    )
    return client


def create_prediction_client():
    client = aip.PredictionServiceClient(
        client_options=client_options
    )
    return client


clients = {}
clients['job'] = create_job_client()
clients['dataset'] = create_dataset_client()
clients['model'] = create_model_client()
clients['endpoint'] = create_endpoint_client()
clients['prediction'] = create_prediction_client()

for client in clients.items():
    print(client)

## Dataset

Now that your clients are ready, your first step in training a model is to create a managed dataset instance, and then upload your labeled data to it.

### Create `Dataset` resource instance

Use the helper function `create_dataset` to create the instance of a `Dataset` resource. This function does the following:

1. Uses the dataset client service.
2. Creates an AI Platform (Unified) `Dataset` resource (`aip.Dataset`), with the following parameters:
 - `display_name`: The human-readable name you choose to give it.
 - `metadata_schema_uri`: The schema for the dataset type.
3. Calls the client dataset service method `create_dataset`, with the following parameters:
 - `parent`: The AI Platform (Unified) location root path for your `Database`, `Model` and `Endpoint` resources.
 - `dataset`: The AI Platform (Unified) dataset object instance you created.
4. The method returns an `operation` object.

An `operation` object is how AI Platform (Unified) handles asynchronous calls for long running operations. While this step usually goes fast, when you first use it in your project, there is a longer delay due to provisioning.

You can use the `operation` object to get status on the operation (e.g., create `Dataset` resource) or to cancel the operation, by invoking an operation method:

| Method      | Description |
| ----------- | ----------- |
| result()    | Waits for the operation to complete and returns a result object in JSON format.      |
| running()   | Returns True/False on whether the operation is still running.        |
| done()      | Returns True/False on whether the operation is completed. |
| canceled()  | Returns True/False on whether the operation was canceled. |
| cancel()    | Cancels the operation (this may take up to 30 seconds). |

In [None]:
TIMEOUT = 90

def create_dataset(name, schema, labels=None, timeout=TIMEOUT):
    start_time = time.time()
    try:
        dataset = aip.Dataset(display_name=name, metadata_schema_uri=schema, labels=labels)

        operation = clients['dataset'].create_dataset(parent=PARENT, dataset=dataset)
        print("Long running operation:", operation.operation.name)
        result = operation.result(timeout=TIMEOUT)
        print("time:", time.time() - start_time)
        print("response")
        print(" name:", result.name)
        print(" display_name:", result.display_name)
        print(" metadata_schema_uri:", result.metadata_schema_uri)
        print(" metadata:", dict(result.metadata))
        print(" create_time:", result.create_time)
        print(" update_time:", result.update_time)
        print(" etag:", result.etag)
        print(" labels:", dict(result.labels))
        return result
    except Exception as e:
        print("exception:", e)
        return None


result = create_dataset("-" + TIMESTAMP, DATA_SCHEMA)

Now save the unique dataset identifier for the `Dataset` resource instance you created.

In [None]:
# The full unique ID for the dataset
dataset_id = result.name
# The short numeric ID for the dataset
dataset_short_id = dataset_id.split('/')[-1]

print(dataset_id)

### Data preparation

The AI Platform (Unified) `Dataset` resource for text has a couple of requirements for your text data.

- Text examples must be stored in a CSV or JSONL file.

#### Location of Cloud Storage training data.

Now set the variable `IMPORT_FILE` to the location of the CSV index file in Cloud Storage.

#### Quick peek at your data

You will use a version of the  dataset that is stored in a public Cloud Storage bucket, using a CSV index file.

Start by doing a quick peek at the data. You count the number of examples by counting the number of rows in the CSV index file  (`wc -l`) and then peek at the first few rows.

In [None]:
if 'IMPORT_FILES' in globals():
    FILE = IMPORT_FILES[0]
else:
    FILE = IMPORT_FILE

count = ! gsutil cat $FILE | wc -l
print("Number of Examples", int(count[0]))

print("First 10 rows")
! gsutil cat $FILE | head

### Import data

Now, import the data into your AI Platform (Unified) Dataset resource. Use this helper function `import_data` to import the data. The function does the following:

- Uses the `Dataset` client.
- Calls the client method `import_data`, with the following parameters:
 - `name`: The human readable name you give to the `Dataset` resource (e.g., ).
 - `import_configs`: The import configuration.

- `import_configs`: A Python list containing a dictionary, with the key/value entries:
 - `gcs_sources`: A list of URIs to the paths of the one or more index files.
 - `import_schema_uri`: The schema identifying the labeling type.

The `import_data()` method returns a long running `operation` object. This will take a few minutes to complete. If you are in a live tutorial, this would be a good time to ask questions, or take a personal break.

In [None]:
def import_data(dataset, gcs_sources, schema):
    config = [{
        'gcs_source': {'uris': gcs_sources},
        'import_schema_uri': schema
    }]
    print("dataset:", dataset_id)
    start_time = time.time()
    try:
        operation = clients['dataset'].import_data(name=dataset_id, import_configs=config)
        print("Long running operation:", operation.operation.name)

        result = operation.result()
        print("result:", result)
        print("time:", int(time.time() - start_time), "secs")
        print("error:", operation.exception())
        print("meta :", operation.metadata)
        print("after: running:", operation.running(), "done:", operation.done(), "cancelled:", operation.cancelled())

        return operation
    except Exception as e:
        print("exception:", e)
        return None


import_data(dataset_id, [IMPORT_FILE], LABEL_SCHEMA)

### Export dataset index

Next, you will export the dataset index to a JSONL file which will then be used by your custom training job to get the data and corresponding labels for training your Flowers model. Use this helper function `export_data` to export the dataset index. The function does the following:

- Uses the dataset client.
- Calls the client method `export_data`, with the parameters:
 - `name`: The human readable name you give to the dataset (e.g., flowers).
 - `export_config`: The export configuration.
- `export_config` A python list containing a dictionary, with the key/value entries:
 - `gcs_destination`: The GCS bucket to write the JSONL dataset index file to.

The `export_data()` method returns a long running `operation` object. This will take a few minutes to complete. The helper function will return the long running operation and the result of the operation when the export has completed.

In [None]:
EXPORT_FILE = BUCKET_NAME + '/export'

def export_data(dataset_id, gcs_dest):
    config = {
        "gcs_destination": {"output_uri_prefix": gcs_dest}
    }
    start_time = time.time()
    try:
        operation = clients['dataset'].export_data(name=dataset_id, export_config=config)
        print("Long running operation:", operation.operation.name)

        result = operation.result()
        print("result:", result)
        print("time:", int(time.time() - start_time), "secs")
        print("error:", operation.exception())
        print("meta :", operation.metadata)
        print("after: running:", operation.running(), "done:", operation.done(), "cancelled:", operation.cancelled())

        return operation, result
    except Exception as e:
        print("exception:", e)
        return None, None


_, result = export_data(dataset_id, EXPORT_FILE)

## Undeploy the `Model` resource

Now undeploy your `Model` resource from the serving `Endpoint` resoure. Use this helper function `undeploy_model`, which takes the following parameters:

- `deployed_model_id`: The model deployment identifier returned by the endpoint service when the `Model` resource was deployed to.
- `endpoint`: The AI Platform (Unified) fully qualified identifier for the `Endpoint` resource where the `Model` is deployed to.

This function calls the endpoint client service's method `undeploy_model`, with the following parameters:

- `deployed_model_id`: The model deployment identifier returned by the endpoint service when the `Model` resource was deployed.
- `endpoint`: The AI Platform (Unified) fully qualified identifier for the `Endpoint` resource where the `Model` resource is deployed.
- `traffic_split`: How to split traffic among the remaining deployed models on the `Endpoint` resource.

Since this is the only deployed model on the `Endpoint` resource, you simply can leave `traffic_split` empty by setting it to {}.

In [None]:
def undeploy_model(deployed_model_id, endpoint):
    response = clients['endpoint'].undeploy_model(endpoint=endpoint, deployed_model_id=deployed_model_id, traffic_split={})
    print(response)


undeploy_model(deployed_model_id, endpoint_id)

# Cleaning up

To clean up all GCP resources used in this project, you can [delete the GCP
project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial.

Otherwise, you can delete the individual resources you created in this tutorial:

- Dataset
- Pipeline
- Model
- Endpoint
- Batch Job
- Custom Job
- Cloud Storage Bucket

In [None]:
delete_dataset = True
delete_pipeline = True
delete_model = True
delete_endpoint = True
delete_batchjob = True
delete_customjob = True
delete_bucket = True

# Delete the dataset using the AI Platform (Unified) fully qualified identifier for the dataset
try:
    if delete_dataset and 'dataset_id' in globals():
        clients['dataset'].delete_dataset(name=dataset_id)
except Exception as e:
    print(e)

# Delete the training pipeline using the AI Platform (Unified) fully qualified identifier for the pipeline
try:
    if delete_pipeline and 'pipeline_id' in globals():
        clients['pipeline'].delete_training_pipeline(name=pipeline_id)
except Exception as e:
    print(e)

# Delete the model using the AI Platform (Unified) fully qualified identifier for the model
try:
    if delete_model and 'model_to_deploy_id' in globals():
        clients['model'].delete_model(name=model_to_deploy_id)
except Exception as e:
    print(e)

# Delete the endpoint using the AI Platform (Unified) fully qualified identifier for the endpoint
try:
    if delete_endpoint and 'endpoint_id' in globals():
        clients['endpoint'].delete_endpoint(name=endpoint_id)
except Exception as e:
    print(e)

# Delete the batch job using the AI Platform (Unified) fully qualified identifier for the batch job
try:
    if delete_batchjob and 'batch_job_id' in globals():
        clients['job'].delete_batch_prediction_job(name=batch_job_id)
except Exception as e:
    print(e)

# Delete the custom job using the AI Platform (Unified) fully qualified identifier for the custom job
try:
    if delete_customjob and 'job_id' in globals():
        clients['job'].delete_custom_job(name=job_id)
except Exception as e:
    print(e)

if delete_bucket and 'BUCKET_NAME' in globals():
    ! gsutil rm -r $BUCKET_NAME