In [13]:
!pip install --upgrade "tfx[kfp]<2"

Collecting tfx[kfp]<2
  Downloading tfx-1.12.0-py3-none-any.whl (2.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/2.7 MB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hCollecting ml-pipelines-sdk==1.12.0 (from tfx[kfp]<2)
  Downloading ml_pipelines_sdk-1.12.0-py3-none-any.whl (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m94.0 MB/s[0m eta [36m0:00:00[0m
Collecting ml-metadata<1.13.0,>=1.12.0 (from tfx[kfp]<2)
  Downloading ml_metadata-1.12.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m71.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting packaging<21,>=20 (from tfx[kfp]<2)
  Downloading packaging-20.9-py2.py3-none-any.whl (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.9/40.9 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting portpi

In [19]:
import tensorflow as tf
print('TensorFlow version: {}'.format(tf.__version__))
import tfx
print('TFX version: {}'.format(tfx.__version__))
import kfp
print('KFP version: {}'.format(kfp.__version__))

TensorFlow version: 2.8.0
TFX version: 1.12.0
KFP version: 1.8.14


In [20]:
GOOGLE_CLOUD_PROJECT = 'edival-402305'
GOOGLE_CLOUD_REGION = 'us-central1'
GCS_BUCKET_NAME = GOOGLE_CLOUD_PROJECT + '-bucket'

if not (GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_REGION and GCS_BUCKET_NAME):
    from absl import logging
    logging.error('Please set all required parameters.')

In [21]:
!gcloud config set project {GOOGLE_CLOUD_PROJECT}

Updated property [core/project].


In [22]:
PIPELINE_NAME = 'edival-vertex-pipelines'

# Path to various pipeline artifact.
PIPELINE_ROOT = 'gs://{}/pipeline_root/{}'.format(
    GCS_BUCKET_NAME, PIPELINE_NAME)

# Paths for users' Python module.
MODULE_ROOT = 'gs://{}/pipeline_module/{}'.format(
    GCS_BUCKET_NAME, PIPELINE_NAME)

# Paths for input data.
# DATA_ROOT = 'gs://{}/data/{}'.format(GCS_BUCKET_NAME, PIPELINE_NAME)

# This is the path where your model will be pushed for serving.
SERVING_MODEL_DIR = 'gs://{}/serving_model/{}'.format(GCS_BUCKET_NAME, PIPELINE_NAME)

print('PIPELINE_ROOT: {}'.format(PIPELINE_ROOT))
print('MODULE_ROOT: {}'.format(MODULE_ROOT))
# print('DATA_ROOT: {}'.format(DATA_ROOT))
print('SERVING_MODEL_DIR: {}'.format(SERVING_MODEL_DIR))

PIPELINE_ROOT: gs://edival-402305-bucket/pipeline_root/edival-vertex-pipelines
MODULE_ROOT: gs://edival-402305-bucket/pipeline_module/edival-vertex-pipelines
SERVING_MODEL_DIR: gs://edival-402305-bucket/serving_model/edival-vertex-pipelines


In [23]:
_trainer_module_file = 'edival_trainer.py'

In [24]:
%%writefile {_trainer_module_file}

# Set the path to the custom config file and the directory to store training checkpoints in
pipeline_file = '/home/jupyter/models/mymodel/pipeline_file.config'
model_dir = '/home/jupyter/training/'
num_steps = 100

# TFX Trainer will call this function.
def run_fn(fn_args: tfx.components.FnArgs):
  """Train the model based on given args.

  Args:
    fn_args: Holds args used to train the model as name/value pairs.
  """

  !python /home/jupyter/models/research/object_detection/model_main_tf2.py \
    --pipeline_config_path={pipeline_file} \
    --model_dir={model_dir} \
    --alsologtostderr \
    --num_train_steps={num_steps} \
    --sample_1_of_n_eval_examples=1

Writing edival_trainer.py


In [25]:
!gsutil cp {_trainer_module_file} {MODULE_ROOT}/

Copying file://edival_trainer.py [Content-Type=text/x-python]...
- [1 files][  674.0 B/  674.0 B]                                                
Operation completed over 1 objects/674.0 B.                                      


In [None]:
def _create_pipeline(pipeline_name: str, pipeline_root: str, data_root: str,
                     module_file: str, serving_model_dir: str,
                     ) -> tfx.dsl.Pipeline:
  """Creates components"""

  # Uses user-provided Python function that trains a model.
  trainer = tfx.components.Trainer(
      module_file=module_file,
      examples=example_gen.outputs['examples'],
      train_args=tfx.proto.TrainArgs(num_steps=100),
      eval_args=tfx.proto.EvalArgs(num_steps=5))

  # Pushes the model to a filesystem destination.
  pusher = tfx.components.Pusher(
      model=trainer.outputs['model'],
      push_destination=tfx.proto.PushDestination(
          filesystem=tfx.proto.PushDestination.Filesystem(
              base_directory=serving_model_dir)))

  components = [
      trainer,
      pusher,
  ]

  return tfx.dsl.Pipeline(
      pipeline_name=pipeline_name,
      pipeline_root=pipeline_root,
      components=components)