# In my project, I am using a Qwiklabs account instead of a Google cloud account to use features on GCP :))

## Setting Path

In [None]:
# Use the latest version of pip.
!pip install --upgrade pip
!pip install --upgrade "tfx[kfp]<2"

In [None]:
import tensorflow as tf
print('TensorFlow version: {}'.format(tf.__version__))
from tfx import v1 as tfx
print('TFX version: {}'.format(tfx.__version__))
import kfp
print('KFP version: {}'.format(kfp.__version__))

In [None]:
GOOGLE_CLOUD_PROJECT = ''      
GOOGLE_CLOUD_REGION = 'us-east1'     
GCS_BUCKET_NAME = GOOGLE_CLOUD_PROJECT + '-gcs'

In [None]:
!gcloud config set project {GOOGLE_CLOUD_PROJECT}

In [None]:
PIPELINE_NAME = 'vertex-pipelines'

# Path to various pipeline artifact.
PIPELINE_ROOT = 'gs://{}/pipeline_root/{}'.format(
    GCS_BUCKET_NAME, PIPELINE_NAME)

# Paths for users' Python module.
MODULE_ROOT = 'gs://{}/pipeline_module/{}'.format(
    GCS_BUCKET_NAME, PIPELINE_NAME)

# Paths for input data.
DATA_ROOT = 'gs://{}/data/{}'.format(GCS_BUCKET_NAME, PIPELINE_NAME)

# This is the path where your model will be pushed for serving.
SERVING_MODEL_DIR = 'gs://{}/serving_model/{}'.format(
    GCS_BUCKET_NAME, PIPELINE_NAME)

_METADATA_PATH = 'gs://{}/metadata/{}/{}'.format(
    GCS_BUCKET_NAME, PIPELINE_NAME, "metadata.db")

print('PIPELINE_ROOT: {}'.format(PIPELINE_ROOT))

In [None]:
!gsutil cp ../trainer/*.py {MODULE_ROOT}/

In [None]:
!gsutil cp  ../data/census/data.csv {DATA_ROOT}/

In [None]:
!gsutil cat {DATA_ROOT}/data.csv | head

## Pipelines

In [8]:
## modunle file 
_trainer_module_file = 'trainer_file.py'
_transform_module_file = 'transform_file.py'

In [None]:
## module function
from trainer.training_pipeline import _create_pipeline

## Run the pipeline on Vertex Pipelines

In [None]:
import os

PIPELINE_DEFINITION_FILE = PIPELINE_NAME + '_pipeline.json'

runner = tfx.orchestration.experimental.KubeflowV2DagRunner(
    config=tfx.orchestration.experimental.KubeflowV2DagRunnerConfig(),
    output_filename=PIPELINE_DEFINITION_FILE)
_ = runner.run(
    _create_pipeline(
        pipeline_name=PIPELINE_NAME,
        pipeline_root=PIPELINE_ROOT,
        data_root=DATA_ROOT,
        transform_module_file=os.path.join(MODULE_ROOT, _transform_module_file),
        trainer_module_file=os.path.join(MODULE_ROOT, _trainer_module_file),
        serving_model_dir=SERVING_MODEL_DIR,
        metadata_path=_METADATA_PATH))

In [None]:
from google.cloud import aiplatform
from google.cloud.aiplatform import pipeline_jobs

aiplatform.init(project=GOOGLE_CLOUD_PROJECT, location=GOOGLE_CLOUD_REGION)

job = pipeline_jobs.PipelineJob(template_path=PIPELINE_DEFINITION_FILE,
                                display_name=PIPELINE_NAME)
job.run(sync=False)