# 04 - Test and Deploy Training Pipeline to Vertex Pipelines

The purpose of this notebook is to test, deploy, and run the `TFX` pipeline on `Vertex Pipelines`. The notebook covers the following tasks:
1. Run the tests locally.
2. Run the pipeline using `Vertex Pipelines`
3. Execute the pipeline deployment `CI/CD` steps using `Cloud Build`.

## Setup

### Import libraries

In [1]:
import os
import kfp
import tfx

print("Tensorflow Version:", tfx.__version__)
print("KFP Version:", kfp.__version__)

Tensorflow Version: 1.15.1
KFP Version: 2.5.0


### Setup Google Cloud project

In [2]:
PROJECT = 'supply-chain-twin-349311' # Change to your project id.
REGION = 'us-central1' # Change to your region.
BUCKET = 'test-lora' # Change to your bucket name.
SERVICE_ACCOUNT ="db-migration-genai@supply-chain-twin-349311.iam.gserviceaccount.com"

if PROJECT == "" or PROJECT is None or PROJECT == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT = shell_output[0]
    
if SERVICE_ACCOUNT == "" or SERVICE_ACCOUNT is None or SERVICE_ACCOUNT == "[your-service-account]":
    # Get your GCP project id from gcloud
    shell_output = !gcloud config list --format 'value(core.account)' 2>/dev/null
    SERVICE_ACCOUNT = shell_output[0]
    
if BUCKET == "" or BUCKET is None or BUCKET == "[your-bucket-name]":
    # Get your bucket name to GCP projet id
    BUCKET = PROJECT
    # Try to create the bucket if it doesn'exists
    ! gsutil mb -l $REGION gs://$BUCKET
    print("")
    
PARENT = f"projects/{PROJECT}/locations/{REGION}"
    
print("Project ID:", PROJECT)
print("Region:", REGION)
print("Bucket name:", BUCKET)
print("Service Account:", SERVICE_ACCOUNT)
print("Vertex API Parent URI:", PARENT)

Project ID: supply-chain-twin-349311
Region: us-central1
Bucket name: test-lora
Service Account: db-migration-genai@supply-chain-twin-349311.iam.gserviceaccount.com
Vertex API Parent URI: projects/supply-chain-twin-349311/locations/us-central1


### Set configurations

In [3]:
BQ_LOCATION = 'US'
BQ_DATASET_NAME = 'chi_e2e' # Change to your BQ dataset name.
BQ_TABLE_NAME = 'chicago_taxitrips_prep'

VERSION = 'v09'
DATASET_DISPLAY_NAME = 'chicago-taxi-tips'
MODEL_DISPLAY_NAME = f'{DATASET_DISPLAY_NAME}-classifier-{VERSION}'
PIPELINE_NAME = f'{MODEL_DISPLAY_NAME}-train-pipeline'

CICD_IMAGE_NAME = 'cicd:latest'
CICD_IMAGE_URI = f"gcr.io/{PROJECT}/{CICD_IMAGE_NAME}"
CICD_IMAGE_URI

'gcr.io/supply-chain-twin-349311/cicd:latest'

In [4]:
!rm -r src/raw_schema/.ipynb_checkpoints/

rm: cannot remove 'src/raw_schema/.ipynb_checkpoints/': No such file or directory


## 1. Run the CICD steps locally

### Set pipeline configurations for the local run

In [5]:
os.environ["DATASET_DISPLAY_NAME"] = DATASET_DISPLAY_NAME
os.environ["MODEL_DISPLAY_NAME"] =  MODEL_DISPLAY_NAME
os.environ["PIPELINE_NAME"] = PIPELINE_NAME
os.environ["PROJECT"] = PROJECT
os.environ["REGION"] = REGION
os.environ["BQ_LOCATION"] = BQ_LOCATION
os.environ["BQ_DATASET_NAME"] = BQ_DATASET_NAME
os.environ["BQ_TABLE_NAME"] = BQ_TABLE_NAME
os.environ["GCS_LOCATION"] = f"gs://{BUCKET}/{DATASET_DISPLAY_NAME}/e2e_tests"
os.environ["TRAIN_LIMIT"] = "1000"
os.environ["TEST_LIMIT"] = "100"
os.environ["UPLOAD_MODEL"] = "0"
os.environ["ACCURACY_THRESHOLD"] = "0.1"
os.environ["BEAM_RUNNER"] = "DirectRunner"
os.environ["TRAINING_RUNNER"] = "local"

In [6]:
from src.tfx_pipelines import config
import importlib
importlib.reload(config)

for key, value in config.__dict__.items():
    if key.isupper(): print(f'{key}: {value}')

2024-06-27 07:17:28.954647: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-27 07:17:28.954756: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-27 07:17:28.956929: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-06-27 07:17:28.967127: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


PROJECT: supply-chain-twin-349311
REGION: us-central1
GCS_LOCATION: gs://test-lora/chicago-taxi-tips/e2e_tests
ARTIFACT_STORE_URI: gs://test-lora/chicago-taxi-tips/e2e_tests/tfx_artifacts
MODEL_REGISTRY_URI: gs://test-lora/chicago-taxi-tips/e2e_tests/model_registry
DATASET_DISPLAY_NAME: chicago-taxi-tips
MODEL_DISPLAY_NAME: chicago-taxi-tips-classifier-v09
PIPELINE_NAME: chicago-taxi-tips-classifier-v09-train-pipeline
ML_USE_COLUMN: ml_use
EXCLUDE_COLUMNS: trip_start_timestamp
TRAIN_LIMIT: 1000
TEST_LIMIT: 100
SERVE_LIMIT: 0
NUM_TRAIN_SPLITS: 4
NUM_EVAL_SPLITS: 1
ACCURACY_THRESHOLD: 0.1
USE_KFP_SA: False
TFX_IMAGE_URI: gcr.io/supply-chain-twin-349311/tfx-chicago-taxi-tips:latest
BEAM_RUNNER: DirectRunner
BEAM_DIRECT_PIPELINE_ARGS: ['--project=supply-chain-twin-349311', '--temp_location=gs://test-lora/chicago-taxi-tips/e2e_tests/temp']
BEAM_DATAFLOW_PIPELINE_ARGS: ['--project=supply-chain-twin-349311', '--temp_location=gs://test-lora/chicago-taxi-tips/e2e_tests/temp', '--region=us-centr

### Run unit tests

In [None]:
pip install pytest

In [None]:
pip install typing_extensions==4.7.1

In [7]:
!py.test src/tests/datasource_utils_tests.py -s

platform linux -- Python 3.10.14, pytest-8.2.2, pluggy-1.5.0
rootdir: /home/jupyter/mlops-with-vertex-ai
plugins: typeguard-4.3.0, anyio-4.4.0, time-machine-2.14.1
collected 2 items                                                              [0m[1m

src/tests/datasource_utils_tests.py BigQuery Source: supply-chain-twin-349311.chi_e2e.chicago_taxitrips_prep
[32m.[0mBigQuery Source: supply-chain-twin-349311.chi_e2e.chicago_taxitrips_prep
[32m.[0m



In [8]:
!py.test src/tests/model_tests.py -s

platform linux -- Python 3.10.14, pytest-8.2.2, pluggy-1.5.0
rootdir: /home/jupyter/mlops-with-vertex-ai
plugins: typeguard-4.3.0, anyio-4.4.0, time-machine-2.14.1
[1mcollecting ... [0m2024-06-27 07:17:57.907375: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-27 07:17:57.907443: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-27 07:17:57.909202: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-06-27 07:17:57.918398: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical ope

### Run e2e pipeline test

In [9]:
!py.test src/tests/pipeline_deployment_tests.py::test_e2e_pipeline -s

platform linux -- Python 3.10.14, pytest-8.2.2, pluggy-1.5.0
rootdir: /home/jupyter/mlops-with-vertex-ai
plugins: typeguard-4.3.0, anyio-4.4.0, time-machine-2.14.1
[1mcollecting ... [0m2024-06-27 07:18:05.930147: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-27 07:18:05.930210: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-27 07:18:05.931826: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-06-27 07:18:05.940504: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical ope

## 2. Run the training pipeline using Vertex Pipelines



### Set the pipeline configurations for the Vertex AI run

In [10]:
os.environ["DATASET_DISPLAY_NAME"] = DATASET_DISPLAY_NAME
os.environ["MODEL_DISPLAY_NAME"] = MODEL_DISPLAY_NAME
os.environ["PIPELINE_NAME"] = PIPELINE_NAME
os.environ["PROJECT"] = PROJECT
os.environ["REGION"] = REGION
os.environ["GCS_LOCATION"] = f"gs://{BUCKET}/{DATASET_DISPLAY_NAME}"
os.environ["TRAIN_LIMIT"] = "85000"
os.environ["TEST_LIMIT"] = "15000"
os.environ["BEAM_RUNNER"] = "DataflowRunner"
os.environ["TRAINING_RUNNER"] = "vertex"
os.environ["TFX_IMAGE_URI"] = f"gcr.io/{PROJECT}/{DATASET_DISPLAY_NAME}:{VERSION}"
os.environ["ENABLE_CACHE"] = "1"

In [11]:
from src.tfx_pipelines import config
import importlib
importlib.reload(config)

for key, value in config.__dict__.items():
    if key.isupper(): print(f'{key}: {value}')

PROJECT: supply-chain-twin-349311
REGION: us-central1
GCS_LOCATION: gs://test-lora/chicago-taxi-tips
ARTIFACT_STORE_URI: gs://test-lora/chicago-taxi-tips/tfx_artifacts
MODEL_REGISTRY_URI: gs://test-lora/chicago-taxi-tips/e2e_tests/model_registry
DATASET_DISPLAY_NAME: chicago-taxi-tips
MODEL_DISPLAY_NAME: chicago-taxi-tips-classifier-v09
PIPELINE_NAME: chicago-taxi-tips-classifier-v09-train-pipeline
ML_USE_COLUMN: ml_use
EXCLUDE_COLUMNS: trip_start_timestamp
TRAIN_LIMIT: 85000
TEST_LIMIT: 15000
SERVE_LIMIT: 0
NUM_TRAIN_SPLITS: 4
NUM_EVAL_SPLITS: 1
ACCURACY_THRESHOLD: 0.1
USE_KFP_SA: False
TFX_IMAGE_URI: gcr.io/supply-chain-twin-349311/chicago-taxi-tips:v09
BEAM_RUNNER: DataflowRunner
BEAM_DIRECT_PIPELINE_ARGS: ['--project=supply-chain-twin-349311', '--temp_location=gs://test-lora/chicago-taxi-tips/temp']
BEAM_DATAFLOW_PIPELINE_ARGS: ['--project=supply-chain-twin-349311', '--temp_location=gs://test-lora/chicago-taxi-tips/temp', '--region=us-central1', '--runner=DataflowRunner']
TRAINING_

### Build the ML container image

This is the `TFX` runtime environment for the training pipeline steps.

In [12]:
!echo $TFX_IMAGE_URI

gcr.io/supply-chain-twin-349311/chicago-taxi-tips:v09


In [13]:
!gcloud builds submit --tag $TFX_IMAGE_URI . --timeout=120m --machine-type=e2-highcpu-8

Creating temporary archive of 70 file(s) totalling 52.4 MiB before compression.
Some files were not included in the source upload.

Check the gcloud log [/home/jupyter/.config/gcloud/logs/2024.06.27/07.24.41.006481.log] to see which files and the contents of the
default gcloudignore file used (see `$ gcloud topic gcloudignore` to learn
more).

Uploading tarball of [.] to [gs://supply-chain-twin-349311_cloudbuild/source/1719473081.198917-78831b76eb1846178aecf1692ee0f2c8.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/supply-chain-twin-349311/locations/global/builds/e0bc706b-d65f-4cfb-8b64-4eb0741047b4].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds/e0bc706b-d65f-4cfb-8b64-4eb0741047b4?project=1049330678395 ].
Waiting for build to complete. Polling interval: 1 second(s).
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "e0bc706b-d65f-4cfb-8b64-4eb0741047b4"

FETCHSOURCE
Fetching storage object: gs://

### Compile pipeline

In [14]:
from src.tfx_pipelines import runner

pipeline_definition_file = f'{config.PIPELINE_NAME}.json'
pipeline_definition = runner.compile_training_pipeline(pipeline_definition_file)

running bdist_wheel
running build
running build_py
creating build
creating build/lib
copying etl.py -> build/lib
copying transformations.py -> build/lib


!!

        ********************************************************************************
        Please avoid running ``setup.py`` directly.
        Instead, use pypa/build, pypa/installer or other
        standards-based tools.

        See https://blog.ganssle.io/articles/2021/10/setup-py-deprecated.html for details.
        ********************************************************************************

!!
  self.initialize_options()


installing to /var/tmp/tmpwne5cfgy
running install
running install_lib
copying build/lib/etl.py -> /var/tmp/tmpwne5cfgy
copying build/lib/transformations.py -> /var/tmp/tmpwne5cfgy
running install_egg_info
running egg_info
creating tfx_user_code_DataTransformer.egg-info
writing tfx_user_code_DataTransformer.egg-info/PKG-INFO
writing dependency_links to tfx_user_code_DataTransformer.egg-info/dependency_links.txt
writing top-level names to tfx_user_code_DataTransformer.egg-info/top_level.txt
writing manifest file 'tfx_user_code_DataTransformer.egg-info/SOURCES.txt'
reading manifest file 'tfx_user_code_DataTransformer.egg-info/SOURCES.txt'
writing manifest file 'tfx_user_code_DataTransformer.egg-info/SOURCES.txt'
Copying tfx_user_code_DataTransformer.egg-info to /var/tmp/tmpwne5cfgy/tfx_user_code_DataTransformer-0.0+de07c8431e7a29dced215501daf4f187c64541d3189d2529c8a52c51eb6c9d4d-py3.10.egg-info
running install_scripts
creating /var/tmp/tmpwne5cfgy/tfx_user_code_DataTransformer-0.0+de07c8

!!

        ********************************************************************************
        Please avoid running ``setup.py`` directly.
        Instead, use pypa/build, pypa/installer or other
        standards-based tools.

        See https://blog.ganssle.io/articles/2021/10/setup-py-deprecated.html for details.
        ********************************************************************************

!!
  self.initialize_options()


installing to /var/tmp/tmp3nf8lmrf
running install
running install_lib
copying build/lib/exporter.py -> /var/tmp/tmp3nf8lmrf
copying build/lib/data.py -> /var/tmp/tmp3nf8lmrf
copying build/lib/model.py -> /var/tmp/tmp3nf8lmrf
copying build/lib/runner.py -> /var/tmp/tmp3nf8lmrf
copying build/lib/trainer.py -> /var/tmp/tmp3nf8lmrf
copying build/lib/task.py -> /var/tmp/tmp3nf8lmrf
copying build/lib/defaults.py -> /var/tmp/tmp3nf8lmrf
running install_egg_info
running egg_info
creating tfx_user_code_ModelTrainer.egg-info
writing tfx_user_code_ModelTrainer.egg-info/PKG-INFO
writing dependency_links to tfx_user_code_ModelTrainer.egg-info/dependency_links.txt
writing top-level names to tfx_user_code_ModelTrainer.egg-info/top_level.txt
writing manifest file 'tfx_user_code_ModelTrainer.egg-info/SOURCES.txt'
reading manifest file 'tfx_user_code_ModelTrainer.egg-info/SOURCES.txt'
writing manifest file 'tfx_user_code_ModelTrainer.egg-info/SOURCES.txt'
Copying tfx_user_code_ModelTrainer.egg-info to 

In [15]:
PIPELINES_STORE = f"gs://{BUCKET}/{DATASET_DISPLAY_NAME}/compiled_pipelines/"
!gsutil cp {pipeline_definition_file} {PIPELINES_STORE}

Copying file://chicago-taxi-tips-classifier-v09-train-pipeline.json [Content-Type=application/json]...
/ [1 files][ 26.8 KiB/ 26.8 KiB]                                                
Operation completed over 1 objects/26.8 KiB.                                     


In [16]:
from google.cloud import aiplatform

pipeline_client = aiplatform.init(
    project=PROJECT, location=REGION)
job= aiplatform.PipelineJob(
    display_name='chicago-taxi-tips',
    template_path=pipeline_definition_file,
    parameter_values={
        'learning_rate': 0.003,
        'batch_size': 512,
        'hidden_units': '128,128',
        'num_epochs': 30,
    }
)
job.submit()

INFO:google.cloud.aiplatform.pipeline_jobs:Creating PipelineJob
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob created. Resource name: projects/1049330678395/locations/us-central1/pipelineJobs/chicago-taxi-tips-classifier-v09-train-pipeline-20240627084403
INFO:google.cloud.aiplatform.pipeline_jobs:To use this PipelineJob in another session:
INFO:google.cloud.aiplatform.pipeline_jobs:pipeline_job = aiplatform.PipelineJob.get('projects/1049330678395/locations/us-central1/pipelineJobs/chicago-taxi-tips-classifier-v09-train-pipeline-20240627084403')
INFO:google.cloud.aiplatform.pipeline_jobs:View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/chicago-taxi-tips-classifier-v09-train-pipeline-20240627084403?project=1049330678395


### Extracting pipeline runs metadata

In [17]:
print(PIPELINE_NAME)

chicago-taxi-tips-classifier-v09-train-pipeline


In [18]:
from google.cloud import aiplatform as vertex_ai

pipeline_df = vertex_ai.get_pipeline_df(PIPELINE_NAME)
pipeline_df = pipeline_df[pipeline_df.pipeline_name == PIPELINE_NAME]
pipeline_df.T

Unnamed: 0,0,1,2,3
pipeline_name,chicago-taxi-tips-classifier-v09-train-pipeline,chicago-taxi-tips-classifier-v09-train-pipeline,chicago-taxi-tips-classifier-v09-train-pipeline,chicago-taxi-tips-classifier-v09-train-pipeline
run_name,chicago-taxi-tips-classifier-v09-train-pipelin...,chicago-taxi-tips-classifier-v09-train-pipelin...,chicago-taxi-tips-classifier-v09-train-pipelin...,chicago-taxi-tips-classifier-v09-train-pipelin...
param.input:hidden_units,128128,128128,128128,128128
param.input:batch_size,512,512,512,512
param.input:learning_rate,0.003,0.003,0.003,0.003
param.input:num_epochs,30,30,30,30
param.vmlmd_lineage_integration,{'pipeline_run_component': {'location_id': 'us...,{'pipeline_run_component': {'parent_task_names...,{'pipeline_run_component': {'parent_task_names...,{'pipeline_run_component': {'task_name': 'chic...


## 3. Execute the pipeline deployment CI/CD steps in Cloud Build

The CI/CD routine is defined in the [pipeline-deployment.yaml](build/pipeline-deployment.yaml) file, and consists of the following steps:
1. Clone the repository to the build environment.
2. Run unit tests.
3. Run a local e2e test of the pipeline.
4. Build the ML container image for pipeline steps.
5. Compile the pipeline.
6. Upload the pipeline to Cloud Storage.

### Build CI/CD container Image for Cloud Build

This is the runtime environment where the steps of testing and deploying the pipeline will be executed.

In [None]:
ls -R

In [None]:
print(CICD_IMAGE_URI)

In [21]:
!echo $CICD_IMAGE_URI

gcr.io/supply-chain-twin-349311/cicd:latest


In [22]:
!gcloud builds submit --tag $CICD_IMAGE_URI build/. --timeout=120m --machine-type=e2-highcpu-8

Creating temporary archive of 9 file(s) totalling 26.8 KiB before compression.
Uploading tarball of [build/.] to [gs://supply-chain-twin-349311_cloudbuild/source/1719496246.867131-112571afb80e469e8c0f76ff3c2b9cc5.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/supply-chain-twin-349311/locations/global/builds/63c7c485-2b52-4e3f-a5cc-941a1ea5ad84].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds/63c7c485-2b52-4e3f-a5cc-941a1ea5ad84?project=1049330678395 ].
Waiting for build to complete. Polling interval: 1 second(s).
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "63c7c485-2b52-4e3f-a5cc-941a1ea5ad84"

FETCHSOURCE
Fetching storage object: gs://supply-chain-twin-349311_cloudbuild/source/1719496246.867131-112571afb80e469e8c0f76ff3c2b9cc5.tgz#1719496247243495
Copying gs://supply-chain-twin-349311_cloudbuild/source/1719496246.867131-112571afb80e469e8c0f76ff3c2b9cc5.tgz#1719496247243495...
/ [1 files][  6

### Run CI/CD from pipeline deployment using Cloud Build

In [27]:
REPO_URL = "https://github.com/Jashuva-07/mlops.git" # Change to your github repo.
BRANCH = "main"

GCS_LOCATION = f"gs://{BUCKET}/{DATASET_DISPLAY_NAME}/"
TEST_GCS_LOCATION = f"gs://{BUCKET}/{DATASET_DISPLAY_NAME}/e2e_tests"
CI_TRAIN_LIMIT = 1000
CI_TEST_LIMIT = 100
CI_UPLOAD_MODEL = 0
CI_ACCURACY_THRESHOLD = 0.1
BEAM_RUNNER = "DataflowRunner"
TRAINING_RUNNER = "vertex"
VERSION = 'tfx-1.2'
PIPELINE_NAME = f'{MODEL_DISPLAY_NAME}-train-pipeline'
PIPELINES_STORE = os.path.join(GCS_LOCATION, "compiled_pipelines")

TFX_IMAGE_URI = f"gcr.io/{PROJECT}/{DATASET_DISPLAY_NAME}:{VERSION}"

SUBSTITUTIONS=f"""\
_REPO_URL='{REPO_URL}',\
_BRANCH={BRANCH},\
_CICD_IMAGE_URI={CICD_IMAGE_URI},\
_PROJECT={PROJECT},\
_REGION={REGION},\
_GCS_LOCATION={GCS_LOCATION},\
_TEST_GCS_LOCATION={TEST_GCS_LOCATION},\
_BQ_LOCATION={BQ_LOCATION},\
_BQ_DATASET_NAME={BQ_DATASET_NAME},\
_BQ_TABLE_NAME={BQ_TABLE_NAME},\
_DATASET_DISPLAY_NAME={DATASET_DISPLAY_NAME},\
_MODEL_DISPLAY_NAME={MODEL_DISPLAY_NAME},\
_CI_TRAIN_LIMIT={CI_TRAIN_LIMIT},\
_CI_TEST_LIMIT={CI_TEST_LIMIT},\
_CI_UPLOAD_MODEL={CI_UPLOAD_MODEL},\
_CI_ACCURACY_THRESHOLD={CI_ACCURACY_THRESHOLD},\
_BEAM_RUNNER={BEAM_RUNNER},\
_TRAINING_RUNNER={TRAINING_RUNNER},\
_TFX_IMAGE_URI={TFX_IMAGE_URI},\
_PIPELINE_NAME={PIPELINE_NAME},\
_PIPELINES_STORE={PIPELINES_STORE}\
"""

!echo $SUBSTITUTIONS

_REPO_URL=https://github.com/Jashuva-07/mlops.git,_BRANCH=main,_CICD_IMAGE_URI=gcr.io/supply-chain-twin-349311/cicd:latest,_PROJECT=supply-chain-twin-349311,_REGION=us-central1,_GCS_LOCATION=gs://test-lora/chicago-taxi-tips/,_TEST_GCS_LOCATION=gs://test-lora/chicago-taxi-tips/e2e_tests,_BQ_LOCATION=US,_BQ_DATASET_NAME=chi_e2e,_BQ_TABLE_NAME=chicago_taxitrips_prep,_DATASET_DISPLAY_NAME=chicago-taxi-tips,_MODEL_DISPLAY_NAME=chicago-taxi-tips-classifier-v09,_CI_TRAIN_LIMIT=1000,_CI_TEST_LIMIT=100,_CI_UPLOAD_MODEL=0,_CI_ACCURACY_THRESHOLD=0.1,_BEAM_RUNNER=DataflowRunner,_TRAINING_RUNNER=vertex,_TFX_IMAGE_URI=gcr.io/supply-chain-twin-349311/chicago-taxi-tips:tfx-1.2,_PIPELINE_NAME=chicago-taxi-tips-classifier-v09-train-pipeline,_PIPELINES_STORE=gs://test-lora/chicago-taxi-tips/compiled_pipelines


In [28]:
!gcloud builds submit --no-source --timeout=120m --config build/pipeline-deployment.yaml --substitutions {SUBSTITUTIONS} --machine-type=e2-highcpu-8

Created [https://cloudbuild.googleapis.com/v1/projects/supply-chain-twin-349311/locations/global/builds/14392ebb-365d-4711-9b45-c408770e3a8a].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds/14392ebb-365d-4711-9b45-c408770e3a8a?project=1049330678395 ].
Waiting for build to complete. Polling interval: 1 second(s).
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "14392ebb-365d-4711-9b45-c408770e3a8a"

FETCHSOURCE
BUILD
Starting Step #0 - "Clone Repository"
Step #0 - "Clone Repository": Already have image (with digest): gcr.io/cloud-builders/git
Step #0 - "Clone Repository": Cloning into 'mlops'...
Step #0 - "Clone Repository": fatal: could not read Username for 'https://github.com': No such device or address
Finished Step #0 - "Clone Repository"
ERROR
ERROR: build step 0 "gcr.io/cloud-builders/git" failed: step exited with non-zero status: 128
----------------------------------------------------------------------