# Heart Failure AutoML pipelines

## Setup

In [1]:
from google.cloud import aiplatform

In [2]:
REGION = "us-central1"
PROJECT = !(gcloud config get-value project)
PROJECT = PROJECT[0]

In [3]:
# Set `PATH` to include the directory containing KFP CLI
PATH = %env PATH
%env PATH=/home/jupyter/.local/bin:{PATH}

env: PATH=/home/jupyter/.local/bin:/usr/local/cuda/bin:/opt/conda/bin:/opt/conda/condabin:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games


## Build the pipeline

Write the pipeline to disk:

In [4]:
%%writefile ./pipelines/kfp_heart_failure_automl_pipeline.py
# Copyright 2021 Google LLC

# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at

# https://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS"
# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

"""Kubeflow Heart Failure Pipeline."""

import os

from google_cloud_pipeline_components.aiplatform import (
    AutoMLTabularTrainingJobRunOp,
    EndpointCreateOp,
    ModelDeployOp,
    TabularDatasetCreateOp,
)
from kfp.v2 import dsl

PIPELINE_ROOT = os.getenv("PIPELINE_ROOT")
PROJECT = os.getenv("PROJECT")
DATASET_SOURCE = os.getenv("DATASET_SOURCE")
PIPELINE_NAME = os.getenv("PIPELINE_NAME", "kfp-heartfailure")
DISPLAY_NAME = os.getenv("MODEL_DISPLAY_NAME", PIPELINE_NAME)
TARGET_COLUMN = os.getenv("TARGET_COLUMN", "HeartDisease")
SERVING_MACHINE_TYPE = os.getenv("SERVING_MACHINE_TYPE", "n1-standard-4")
SPLIT_COLUMN = os.getenv("SPLIT_COLUMN", "split")
OPTIMIZATION_OBJECTIVE = os.getenv("OPTIMIZATION_OBJECTIVE", "maximize-au-roc")
BUDGET_MILLI_NODE_HOURS = os.getenv("BUDGET_MILLI_NODE_HOURS", "2000")

@dsl.pipeline(
    name=f"{PIPELINE_NAME}-vertex-automl-pipeline",
    description=f"AutoML Vertex Pipeline for {PIPELINE_NAME}",
    pipeline_root=PIPELINE_ROOT,
)
def create_pipeline():

    dataset_create_task = TabularDatasetCreateOp(
        display_name=DISPLAY_NAME,
        gcs_source=DATASET_SOURCE,
        project=PROJECT,
    )

    automl_training_task = AutoMLTabularTrainingJobRunOp(
        project=PROJECT,
        display_name=DISPLAY_NAME,
        optimization_prediction_type="classification",
        dataset=dataset_create_task.outputs["dataset"],
        target_column=TARGET_COLUMN,
        predefined_split_column_name=SPLIT_COLUMN,
        optimization_objective=OPTIMIZATION_OBJECTIVE,
        budget_milli_node_hours=BUDGET_MILLI_NODE_HOURS,
    )

    endpoint_create_task = EndpointCreateOp(
        project=PROJECT,
        display_name=DISPLAY_NAME,
        description="Heart Failure AutoML model",
    )

    model_deploy_task = ModelDeployOp(  # pylint: disable=unused-variable
        model=automl_training_task.outputs["model"],
        endpoint=endpoint_create_task.outputs["endpoint"],
        deployed_model_display_name=DISPLAY_NAME,
        dedicated_resources_machine_type=SERVING_MACHINE_TYPE,
        dedicated_resources_min_replica_count=1,
        dedicated_resources_max_replica_count=1,
        #enable_access_logging=True, #comment out because of failure
    )


Overwriting ./pipelines/kfp_heart_failure_automl_pipeline.py


### Compile the pipeline

Define the environment variables that will be passed to the pipeline compiler:

In [5]:
ARTIFACT_STORE = f"gs://{PROJECT}-kfp-artifact-store"
PIPELINE_ROOT = f"{ARTIFACT_STORE}/pipeline"
#DATASET_SOURCE = f"bq://{PROJECT}.covertype_dataset.covertype"
DATASET_SOURCE = f"gs://{PROJECT}/heart_failure/scaled-engineered-heart.csv"

%env PIPELINE_ROOT={PIPELINE_ROOT}
%env PROJECT={PROJECT}
%env REGION={REGION}
%env DATASET_SOURCE={DATASET_SOURCE}

env: PIPELINE_ROOT=gs://qwiklabs-asl-02-99f66d8df225-kfp-artifact-store/pipeline
env: PROJECT=qwiklabs-asl-02-99f66d8df225
env: REGION=us-central1
env: DATASET_SOURCE=gs://qwiklabs-asl-02-99f66d8df225/heart_failure/scaled-engineered-heart.csv


Verify the `ARTIFACT_STORE` has been created, and let us create it if not:

In [6]:
!gsutil ls | grep ^{ARTIFACT_STORE}/$ || gsutil mb -l {REGION} {ARTIFACT_STORE}

gs://qwiklabs-asl-02-99f66d8df225-kfp-artifact-store/


#### Use the CLI compiler to compile the pipeline

Compile the pipeline from the Python file we generated into a JSON description using the following command:

In [7]:
PIPELINE_JSON = "pipelines/kfp_heart_failure_automl_pipeline.json"

In [8]:
!dsl-compile-v2 --py pipelines/kfp_heart_failure_automl_pipeline.py --output $PIPELINE_JSON



**Note:** You can also use the Python SDK to compile the pipeline:

```python
from kfp.v2 import compiler

compiler.Compiler().compile(
    pipeline_func=create_pipeline, 
    package_path=PIPELINE_JSON,
)

```

The result is the pipeline file. 

In [9]:
!head {PIPELINE_JSON}

{
  "pipelineSpec": {
    "components": {
      "comp-automl-tabular-training-job": {
        "executorLabel": "exec-automl-tabular-training-job",
        "inputDefinitions": {
          "artifacts": {
            "dataset": {
              "artifactType": {
                "schemaTitle": "google.VertexDataset",


### Deploy the pipeline package

Questions for class:

I hit this error - The replica workerpool0-0 exited with a non-zero status of 13. To find out more about why your job exited please check the logs: https://console.cloud.google.com/logs/viewer?project=9475810701&resource=ml_job%2Fjob_id%2F3079020273060544512&advancedFilter=resource.type%3D%22ml_job%22%0Aresource.labels.job_id%3D%223079020273060544512%22

- how do I restart a pipeline at a failed step?
- How do I get permissions to view this log - https://console.cloud.google.com/logs/viewer?project=9475810701&resource=ml_job%2Fjob_id%2F3079020273060544512&advancedFilter=resource.type%3D%22ml_job%22%0Aresource.labels.job_id%3D%223079020273060544512%22
- How do I debug this error: The replica workerpool0-0 exited with a non-zero status of 13.

In [10]:
aiplatform.init(project=PROJECT, location=REGION)

pipeline = aiplatform.PipelineJob(
    display_name="kfp_heart_failure_automl_pipeline",
    template_path=PIPELINE_JSON,
    enable_caching=True,
)

pipeline.run()

Creating PipelineJob
PipelineJob created. Resource name: projects/9475810701/locations/us-central1/pipelineJobs/kfp-heartfailure-vertex-automl-pipeline-20230205204728
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/9475810701/locations/us-central1/pipelineJobs/kfp-heartfailure-vertex-automl-pipeline-20230205204728')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/kfp-heartfailure-vertex-automl-pipeline-20230205204728?project=9475810701
PipelineJob projects/9475810701/locations/us-central1/pipelineJobs/kfp-heartfailure-vertex-automl-pipeline-20230205204728 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/9475810701/locations/us-central1/pipelineJobs/kfp-heartfailure-vertex-automl-pipeline-20230205204728 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/9475810701/locations/us-central1/pipelineJobs/kfp-heartfailure-vertex-automl-pipeline-202302

Copyright 2021 Google LLC

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.