# 04 - Test and Deploy Training Pipeline to Vertex Pipelines

The purpose of this notebook is to test, deploy, and run the `TFX` pipeline on `Vertex Pipelines`. The notebook covers the following tasks:
1. Run the tests locally.
2. Run the pipeline using `Vertex Pipelines`
3. Execute the pipeline deployment `CI/CD` steps using `Cloud Build`.

## Setup

### Import libraries

In [1]:
import os
import kfp
from tfx import v1 as tfx

print("TFX Version:", tfx.__version__)
print("KFP Version:", kfp.__version__)

2024-02-01 13:44:43.723399: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Using TensorFlow backend
TFX Version: 1.14.0
KFP Version: 1.8.22


### Setup Google Cloud project

In [2]:
PROJECT = 'stellar-orb-408015' # Change to your project id.
REGION = 'us-central1' # Change to your region.
BUCKET =  'stellar-orb-408015-bucket' # Change to your bucket name.
SERVICE_ACCOUNT = "pipelines-sa@stellar-orb-408015.iam.gserviceaccount.com"

if PROJECT == "" or PROJECT is None or PROJECT == "stellar-orb-408015":
    # Get your GCP project id from gcloud
    shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT = shell_output[0]
    
if SERVICE_ACCOUNT == "" or SERVICE_ACCOUNT is None or SERVICE_ACCOUNT == "pipelines-sa@stellar-orb-408015.iam.gserviceaccount.com":
    # Get your GCP project id from gcloud
    shell_output = !gcloud config list --format 'value(core.account)' 2>/dev/null
    SERVICE_ACCOUNT = shell_output[0]
    
if BUCKET == "" or BUCKET is None or BUCKET == "stellar-orb-408015-bucket":
    # Get your bucket name to GCP project id
    BUCKET = PROJECT
    # Try to create the bucket if it doesn't exists
    ! gsutil mb -l $REGION gs://$BUCKET
    print("")
    
print("Project ID:", PROJECT)
print("Region:", REGION)
print("Bucket name:", BUCKET)
print("Service Account:", SERVICE_ACCOUNT)

Creating gs://stellar-orb-408015/...
ServiceException: 409 A Cloud Storage bucket named 'stellar-orb-408015' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.

Project ID: stellar-orb-408015
Region: us-central1
Bucket name: stellar-orb-408015
Service Account: 13110252891-compute@developer.gserviceaccount.com


### Set configurations

In [3]:
BQ_LOCATION = 'US'
BQ_DATASET_NAME = 'playground_us' # Change to your BQ dataset name.
BQ_TABLE_NAME = 'chicago_taxitrips_final'

VERSION = 'v01'
DATASET_DISPLAY_NAME = 'chicago-taxi-tips'
MODEL_DISPLAY_NAME = f'{DATASET_DISPLAY_NAME}-classifier-{VERSION}'
PIPELINE_NAME = f'{MODEL_DISPLAY_NAME}-train-pipeline'

CICD_IMAGE_NAME = 'cicd:latest'
CICD_IMAGE_URI = f"gcr.io/{PROJECT}/{CICD_IMAGE_NAME}"

In [None]:
!rm -r src/raw_schema/.ipynb_checkpoints/

## 1. Run the CICD steps locally

### Set pipeline configurations for the local run

In [None]:
import os
os.environ["DATASET_DISPLAY_NAME"] = DATASET_DISPLAY_NAME
os.environ["MODEL_DISPLAY_NAME"] =  MODEL_DISPLAY_NAME
os.environ["PIPELINE_NAME"] = f'{DATASET_DISPLAY_NAME}-pipeline-v01-'
os.environ["PROJECT"] = PROJECT
os.environ["REGION"] = REGION
os.environ["BQ_LOCATION"] = BQ_LOCATION
os.environ["BQ_DATASET_NAME"] = BQ_DATASET_NAME
os.environ["BQ_TABLE_NAME"] = BQ_TABLE_NAME
os.environ["GCS_LOCATION"] = f"gs://{BUCKET}/{DATASET_DISPLAY_NAME}/e2e_tests"
os.environ["TRAIN_LIMIT"] = "10" #"1000"
os.environ["TEST_LIMIT"] = "2" #"100"
os.environ["UPLOAD_MODEL"] = "0"
os.environ["ACCURACY_THRESHOLD"] = "0.1"
os.environ["BEAM_RUNNER"] = "DirectRunner"
os.environ["TRAINING_RUNNER"] = "local"
os.environ["ENDPOINT_NAME"] = 'predict-explain-for-' + f'{DATASET_DISPLAY_NAME}-pipeline-v01-'

In [25]:
from src.tfx_pipelines import config
import importlib
importlib.reload(config)

for key, value in config.__dict__.items():
    if key.isupper(): print(f'{key}: {value}')


PROJECT: stellar-orb-408015
REGION: us-central1
GCS_LOCATION: gs://stellar-orb-408015/chicago-taxi-tips/e2e_tests
ARTIFACT_STORE_URI: gs://stellar-orb-408015/chicago-taxi-tips/e2e_tests/tfx_artifacts
MODEL_REGISTRY_URI: gs://stellar-orb-408015/chicago-taxi-tips/e2e_tests/model_registry
DATASET_DISPLAY_NAME: chicago-taxi-tips
MODEL_DISPLAY_NAME: chicago-taxi-tips-classifier-v01
PIPELINE_NAME: chicago-taxi-tips-pipeline-v01-
PIPELINE_ROOT: gs://stellar-orb-408015/chicago-taxi-tips/e2e_tests/tfx_artifacts/chicago-taxi-tips-pipeline-v01-
PIPELINE_DEFINITION_FILE: chicago-taxi-tips-pipeline-v01-info_pipeline.json
ML_USE_COLUMN: ml_use
EXCLUDE_COLUMNS: trip_start_timestamp
TRAIN_LIMIT: 10
TEST_LIMIT: 2
SERVE_LIMIT: 0
NUM_TRAIN_SPLITS: 4
NUM_EVAL_SPLITS: 1
ACCURACY_THRESHOLD: 0.1
USE_KFP_SA: False
TFX_IMAGE_URI: 
BEAM_RUNNER: DirectRunner
BEAM_DIRECT_PIPELINE_ARGS: ['--project=stellar-orb-408015', '--temp_location=gs://stellar-orb-408015/chicago-taxi-tips/e2e_tests/temp']
BEAM_DATAFLOW_PIPELI

### Run unit tests

In [8]:
!pip install typing_extensions==4.7.1 --upgrade

Collecting typing_extensions==4.7.1
  Downloading typing_extensions-4.7.1-py3-none-any.whl.metadata (3.1 kB)
Downloading typing_extensions-4.7.1-py3-none-any.whl (33 kB)
Installing collected packages: typing_extensions
  Attempting uninstall: typing_extensions
    Found existing installation: typing_extensions 4.5.0
    Uninstalling typing_extensions-4.5.0:
      Successfully uninstalled typing_extensions-4.5.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
fastapi 0.104.1 requires typing-extensions>=4.8.0, but you have typing-extensions 4.7.1 which is incompatible.
tensorflow 2.13.1 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.7.1 which is incompatible.[0m[31m
[0mSuccessfully installed typing_extensions-4.7.1


In [9]:
!pytest src/tests/datasource_utils_tests.py -s

platform linux -- Python 3.10.13, pytest-8.0.0, pluggy-1.3.0
rootdir: /home/jupyter/mlops-with-vertex-ai-steps
plugins: typeguard-4.1.5, anyio-3.7.1
collected 2 items                                                              [0m[1m

src/tests/datasource_utils_tests.py BigQuery Source: stellar-orb-408015.playground_us.chicago_taxitrips_final
[32m.[0mBigQuery Source: stellar-orb-408015.playground_us.chicago_taxitrips_final
[32m.[0m

../../../opt/conda/lib/python3.10/site-packages/google/rpc/__init__.py:18
    import pkg_resources

  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
    declare_namespace(pkg)

  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  

In [22]:
!py.test src/tests/model_tests.py -s

platform linux -- Python 3.10.13, pytest-8.0.0, pluggy-1.3.0
rootdir: /home/jupyter/mlops-with-vertex-ai-steps
plugins: typeguard-4.1.5, anyio-3.7.1
[1mcollecting ... [0m2024-01-31 16:24:45.884715: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
collected 2 items                                                              [0m[1m

src/tests/model_tests.py [32m.[0mModel: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 trip_month_xf (InputLayer)  [(None,)]                    0         []                            
                                                                                

### Run e2e pipeline test

In [26]:
!py.test src/tests/pipeline_deployment_tests.py::test_e2e_pipeline -s

platform linux -- Python 3.10.13, pytest-8.0.0, pluggy-1.3.0
rootdir: /home/jupyter/mlops-with-vertex-ai-steps
plugins: typeguard-4.1.5, anyio-3.7.1
[1mcollecting ... [0m2024-01-31 16:32:34.389665: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Using TensorFlow backend
TFX Version: 1.14.0
Tensorflow Version: 2.13.1
collected 1 item                                                               [0m[1m

src/tests/pipeline_deployment_tests.py upload_model: 0
Pipeline e2e test artifacts stored in: gs://stellar-orb-408015/chicago-taxi-tips/e2e_tests
ML metadata store is ready.
Excluding no splits because exclude_splits is not set.
Excluding no splits because exclude_splits is not set.
Pipeline components: ['HyperparamsGen', 'TrainDataGen', 'TestDataGen', 

## 2. Run the training pipeline using Vertex Pipelines



### Set the pipeline configurations for the Vertex AI run

In [4]:
from src.tfx_pipelines import config
os.environ["DATASET_DISPLAY_NAME"] = DATASET_DISPLAY_NAME
os.environ["MODEL_DISPLAY_NAME"] = MODEL_DISPLAY_NAME
os.environ["PIPELINE_NAME"] = f'{DATASET_DISPLAY_NAME}-pipeline-v01-'
os.environ["PROJECT"] = PROJECT
os.environ["REGION"] = REGION
os.environ["GCS_LOCATION"] = f"gs://{BUCKET}/{DATASET_DISPLAY_NAME}/source"
os.environ["TRAIN_LIMIT"] = "10"#"85000"
os.environ["TEST_LIMIT"] = "2"#"15000"
os.environ["BEAM_RUNNER"] = "DataflowRunner"
os.environ["TRAINING_RUNNER"] = "vertex"
os.environ["TFX_IMAGE_URI"] = f"gcr.io/{PROJECT}/{DATASET_DISPLAY_NAME}:03"
os.environ["ENABLE_CACHE"] = "1"
os.environ["ENDPOINT_NAME"] = 'predict-explain-for-' + f'{DATASET_DISPLAY_NAME}-pipeline'

In [5]:
from src.tfx_pipelines import config
import importlib
importlib.reload(config)

for key, value in config.__dict__.items():
    if key.isupper(): print(f'{key}: {value}')

PROJECT: stellar-orb-408015
REGION: us-central1
GCS_LOCATION: gs://stellar-orb-408015/chicago-taxi-tips/source
ARTIFACT_STORE_URI: gs://stellar-orb-408015/chicago-taxi-tips/source/tfx_artifacts
MODEL_REGISTRY_URI: model_registry
DATASET_DISPLAY_NAME: chicago-taxi-tips
MODEL_DISPLAY_NAME: chicago-taxi-tips-classifier-v01
PIPELINE_NAME: chicago-taxi-tips-pipeline-v01-
PIPELINE_ROOT: gs://stellar-orb-408015/chicago-taxi-tips/source/tfx_artifacts/chicago-taxi-tips-pipeline-v01-
PIPELINE_DEFINITION_FILE: chicago-taxi-tips-pipeline-v01-info_pipeline.json
ML_USE_COLUMN: ml_use
EXCLUDE_COLUMNS: trip_start_timestamp
TRAIN_LIMIT: 10
TEST_LIMIT: 2
SERVE_LIMIT: 0
NUM_TRAIN_SPLITS: 4
NUM_EVAL_SPLITS: 1
ACCURACY_THRESHOLD: 0.8
USE_KFP_SA: False
TFX_IMAGE_URI: gcr.io/stellar-orb-408015/chicago-taxi-tips:03
BEAM_RUNNER: DataflowRunner
BEAM_DIRECT_PIPELINE_ARGS: ['--project=stellar-orb-408015', '--temp_location=gs://stellar-orb-408015/chicago-taxi-tips/source/temp']
BEAM_DATAFLOW_PIPELINE_ARGS: ['--pro

### Build the ML container image

This is the `TFX` runtime environment for the training pipeline steps.

In [6]:
!echo $TFX_IMAGE_URI

gcr.io/stellar-orb-408015/chicago-taxi-tips:03


In [None]:
!gcloud builds submit --tag $TFX_IMAGE_URI . --timeout=2h

### Compile pipeline

In [7]:
from src.tfx_pipelines import runner

pipeline_definition = runner.compile_training_pipeline()

TFX Version: 1.14.0
Tensorflow Version: 2.13.1
running bdist_wheel
running build
running build_py
creating build
creating build/lib
copying etl.py -> build/lib
copying transformations.py -> build/lib
installing to /var/tmp/tmpnha_c6h0
running install
running install_lib
copying build/lib/etl.py -> /var/tmp/tmpnha_c6h0
copying build/lib/transformations.py -> /var/tmp/tmpnha_c6h0
running install_egg_info
running egg_info
creating tfx_user_code_DataTransformer.egg-info


!!

        ********************************************************************************
        Please avoid running ``setup.py`` directly.
        Instead, use pypa/build, pypa/installer or other
        standards-based tools.

        See https://blog.ganssle.io/articles/2021/10/setup-py-deprecated.html for details.
        ********************************************************************************

!!
  self.initialize_options()


writing tfx_user_code_DataTransformer.egg-info/PKG-INFO
writing dependency_links to tfx_user_code_DataTransformer.egg-info/dependency_links.txt
writing top-level names to tfx_user_code_DataTransformer.egg-info/top_level.txt
writing manifest file 'tfx_user_code_DataTransformer.egg-info/SOURCES.txt'
reading manifest file 'tfx_user_code_DataTransformer.egg-info/SOURCES.txt'
writing manifest file 'tfx_user_code_DataTransformer.egg-info/SOURCES.txt'
Copying tfx_user_code_DataTransformer.egg-info to /var/tmp/tmpnha_c6h0/tfx_user_code_DataTransformer-0.0+de07c8431e7a29dced215501daf4f187c64541d3189d2529c8a52c51eb6c9d4d-py3.10.egg-info
running install_scripts
creating /var/tmp/tmpnha_c6h0/tfx_user_code_DataTransformer-0.0+de07c8431e7a29dced215501daf4f187c64541d3189d2529c8a52c51eb6c9d4d.dist-info/WHEEL
creating '/var/tmp/tmp5r8x7bnl/tfx_user_code_DataTransformer-0.0+de07c8431e7a29dced215501daf4f187c64541d3189d2529c8a52c51eb6c9d4d-py3-none-any.whl' and adding '/var/tmp/tmpnha_c6h0' to it
adding '

!!

        ********************************************************************************
        Please avoid running ``setup.py`` directly.
        Instead, use pypa/build, pypa/installer or other
        standards-based tools.

        See https://blog.ganssle.io/articles/2021/10/setup-py-deprecated.html for details.
        ********************************************************************************

!!
  self.initialize_options()


writing tfx_user_code_ModelTrainer.egg-info/PKG-INFO
writing dependency_links to tfx_user_code_ModelTrainer.egg-info/dependency_links.txt
writing top-level names to tfx_user_code_ModelTrainer.egg-info/top_level.txt
writing manifest file 'tfx_user_code_ModelTrainer.egg-info/SOURCES.txt'
reading manifest file 'tfx_user_code_ModelTrainer.egg-info/SOURCES.txt'
writing manifest file 'tfx_user_code_ModelTrainer.egg-info/SOURCES.txt'
Copying tfx_user_code_ModelTrainer.egg-info to /var/tmp/tmppzjf4ojl/tfx_user_code_ModelTrainer-0.0+b2e2657f2c58d6b17ae2108a61517fbe1be6ffa4a912f5b2d0055ab70b96bd4c-py3.10.egg-info
running install_scripts
creating /var/tmp/tmppzjf4ojl/tfx_user_code_ModelTrainer-0.0+b2e2657f2c58d6b17ae2108a61517fbe1be6ffa4a912f5b2d0055ab70b96bd4c.dist-info/WHEEL
creating '/var/tmp/tmpaexp4np7/tfx_user_code_ModelTrainer-0.0+b2e2657f2c58d6b17ae2108a61517fbe1be6ffa4a912f5b2d0055ab70b96bd4c-py3-none-any.whl' and adding '/var/tmp/tmppzjf4ojl' to it
adding 'defaults.py'
adding 'features.

In [8]:
PIPELINES_STORE = f"gs://{BUCKET}/{DATASET_DISPLAY_NAME}/compiled_pipelines/"
!gsutil cp {config.PIPELINE_DEFINITION_FILE} {PIPELINES_STORE}

Copying file://chicago-taxi-tips-pipeline-v01-info_pipeline.json [Content-Type=application/json]...
/ [1 files][ 31.0 KiB/ 31.0 KiB]                                                
Operation completed over 1 objects/31.0 KiB.                                     


### Submit run to Vertex Pipelines

In [9]:
runner.submit_pipeline()

Creating PipelineJob


INFO:google.cloud.aiplatform.pipeline_jobs:Creating PipelineJob


PipelineJob created. Resource name: projects/13110252891/locations/us-central1/pipelineJobs/chicago-taxi-tips-pipeline-v01-20240201105706


INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob created. Resource name: projects/13110252891/locations/us-central1/pipelineJobs/chicago-taxi-tips-pipeline-v01-20240201105706


To use this PipelineJob in another session:


INFO:google.cloud.aiplatform.pipeline_jobs:To use this PipelineJob in another session:


pipeline_job = aiplatform.PipelineJob.get('projects/13110252891/locations/us-central1/pipelineJobs/chicago-taxi-tips-pipeline-v01-20240201105706')


INFO:google.cloud.aiplatform.pipeline_jobs:pipeline_job = aiplatform.PipelineJob.get('projects/13110252891/locations/us-central1/pipelineJobs/chicago-taxi-tips-pipeline-v01-20240201105706')


View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/chicago-taxi-tips-pipeline-v01-20240201105706?project=13110252891


INFO:google.cloud.aiplatform.pipeline_jobs:View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/chicago-taxi-tips-pipeline-v01-20240201105706?project=13110252891


### Extracting pipeline runs metadata

In [10]:
from google.cloud import aiplatform as vertex_ai

pipeline_df = vertex_ai.get_pipeline_df(config.PIPELINE_NAME)
pipeline_df = pipeline_df[pipeline_df.pipeline_name == config.PIPELINE_NAME]
pipeline_df.T

Unnamed: 0,0
pipeline_name,chicago-taxi-tips-pipeline-v01-
run_name,chicago-taxi-tips-pipeline-v01-20240201105706
param.vmlmd_lineage_integration,{'pipeline_run_component': {'pipeline_run_id':...
param.input:num_epochs,30
param.input:learning_rate,0.003
param.input:batch_size,512


## 3. Execute the pipeline deployment CI/CD steps in Cloud Build

The CI/CD routine is defined in the [pipeline-deployment.yaml](build/pipeline-deployment.yaml) file, and consists of the following steps:
1. Clone the repository to the build environment.
2. Run unit tests.
3. Run a local e2e test of the pipeline.
4. Build the ML container image for pipeline steps.
5. Compile the pipeline.
6. Upload the pipeline to Cloud Storage.

### Build CI/CD container Image for Cloud Build

This is the runtime environment where the steps of testing and deploying the pipeline will be executed.

In [4]:
!echo $CICD_IMAGE_URI

gcr.io/stellar-orb-408015/cicd:latest


In [13]:
!gcloud builds submit --tag $CICD_IMAGE_URI build/. --timeout=3h

Creating temporary tarball archive of 11 file(s) totalling 29.1 KiB before compression.
Uploading tarball of [build/.] to [gs://stellar-orb-408015_cloudbuild/source/1706786804.361925-54d3a9151f3e4645bfa1c49c1781a36b.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/stellar-orb-408015/locations/global/builds/8ff3f9f2-3eae-4a7c-8258-c8099588d430].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds/8ff3f9f2-3eae-4a7c-8258-c8099588d430?project=13110252891 ].
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "8ff3f9f2-3eae-4a7c-8258-c8099588d430"

FETCHSOURCE
Fetching storage object: gs://stellar-orb-408015_cloudbuild/source/1706786804.361925-54d3a9151f3e4645bfa1c49c1781a36b.tgz#1706786804603102
Copying gs://stellar-orb-408015_cloudbuild/source/1706786804.361925-54d3a9151f3e4645bfa1c49c1781a36b.tgz#1706786804603102...
/ [1 files][  6.0 KiB/  6.0 KiB]                                                
Operation com

### Run CI/CD from pipeline deployment using Cloud Build

In [16]:
REPO_URL = "https://github.com/Saoussen-CH/mlops-with-vertex-ai-steps.git" # Change to your github repo.
BRANCH = "main"
GCS_LOCATION = f"gs://{BUCKET}/{DATASET_DISPLAY_NAME}/"
TEST_GCS_LOCATION = f"gs://{BUCKET}/{DATASET_DISPLAY_NAME}/e2e_tests"
CI_TRAIN_LIMIT = 10
CI_TEST_LIMIT = 2
CI_ACCURACY_THRESHOLD = 0.1
BEAM_RUNNER = "DataflowRunner"
TRAINING_RUNNER = "vertex"
PIPELINE_NAME = f'{MODEL_DISPLAY_NAME}-train-pipeline'
MODEL_DISPLAY_NAME = f'{DATASET_DISPLAY_NAME}-classifier-{VERSION}'
ENDPOINT_NAME_TEST = f'predict-explain-on-{DATASET_DISPLAY_NAME}-classifier_local-test'
ENDPOINT_NAME_PROD = f'predict-explain-on-{DATASET_DISPLAY_NAME}-classifier-prod'
BQ_LOCATION = 'US'
BQ_DATASET_NAME = 'playground_us' # Change to your BQ dataset name.
BQ_TABLE_NAME = 'chicago_taxitrips_final'
VERSION = 'v01'
DATASET_DISPLAY_NAME = 'chicago-taxi-tips'
MODEL_DISPLAY_NAME = f'{DATASET_DISPLAY_NAME}-classifier-{VERSION}'
PIPELINE_NAME = f'{MODEL_DISPLAY_NAME}-train-pipeline'
CICD_IMAGE_NAME = 'cicd:latest'
CICD_IMAGE_URI = f"gcr.io/{PROJECT}/{CICD_IMAGE_NAME}"
TFX_IMAGE_URI = f"gcr.io/{PROJECT}/{DATASET_DISPLAY_NAME}:04"

SUBSTITUTIONS=f"""\
_REPO_URL='{REPO_URL}',\
_BRANCH={BRANCH},\
_CICD_IMAGE_URI={CICD_IMAGE_URI},\
_PROJECT={PROJECT},\
_REGION={REGION},\
_GCS_LOCATION={GCS_LOCATION},\
_TEST_GCS_LOCATION={TEST_GCS_LOCATION},\
_BQ_LOCATION={BQ_LOCATION},\
_BQ_DATASET_NAME={BQ_DATASET_NAME},\
_BQ_TABLE_NAME={BQ_TABLE_NAME},\
_DATASET_DISPLAY_NAME={DATASET_DISPLAY_NAME},\
_MODEL_DISPLAY_NAME={MODEL_DISPLAY_NAME},\
_CI_TRAIN_LIMIT={CI_TRAIN_LIMIT},\
_CI_TEST_LIMIT={CI_TEST_LIMIT},\
_CI_ACCURACY_THRESHOLD={CI_ACCURACY_THRESHOLD},\
_BEAM_RUNNER={BEAM_RUNNER},\
_TRAINING_RUNNER={TRAINING_RUNNER},\
_TFX_IMAGE_URI={TFX_IMAGE_URI},\
_PIPELINE_NAME={PIPELINE_NAME},\
_ENDPOINT_NAME_PROD={ENDPOINT_NAME_PROD}\,
_ENDPOINT_NAME_TEST={ENDPOINT_NAME_TEST}\
"""

!echo $SUBSTITUTIONS

_REPO_URL=https://github.com/Saoussen-CH/mlops-with-vertex-ai-steps.git,_BRANCH=main,_CICD_IMAGE_URI=gcr.io/stellar-orb-408015/cicd:latest,_PROJECT=stellar-orb-408015,_REGION=us-central1,_GCS_LOCATION=gs://stellar-orb-408015/chicago-taxi-tips/,_TEST_GCS_LOCATION=gs://stellar-orb-408015/chicago-taxi-tips/e2e_tests,_BQ_LOCATION=US,_BQ_DATASET_NAME=playground_us,_BQ_TABLE_NAME=chicago_taxitrips_final,_DATASET_DISPLAY_NAME=chicago-taxi-tips,_MODEL_DISPLAY_NAME=chicago-taxi-tips-classifier-v01,_CI_TRAIN_LIMIT=10,_CI_TEST_LIMIT=2,_CI_ACCURACY_THRESHOLD=0.1,_BEAM_RUNNER=DataflowRunner,_TRAINING_RUNNER=vertex,_TFX_IMAGE_URI=gcr.io/stellar-orb-408015/chicago-taxi-tips:04,_PIPELINE_NAME=chicago-taxi-tips-classifier-v01-train-pipeline,_ENDPOINT_NAME_PROD=predict-explain-on-chicago-taxi-tips-classifier-prod,


In [17]:
!gcloud builds submit --no-source --timeout=7h --config build/pipeline-deployment.yaml --substitutions {SUBSTITUTIONS} 

[1;31mERROR:[0m (gcloud.builds.submit) parsing build/pipeline-deployment.yaml: while parsing a block mapping
  in "build/pipeline-deployment.yaml", line 19, column 1
expected <block end>, but found '-'
  in "build/pipeline-deployment.yaml", line 109, column 1
