In [16]:
import os
import tensorflow as tf
from tfx import v1 as tfx

In [17]:
print(f'''
tensorflow version = {tf.__version__}
tfx version        = {tfx.__version__}
''')


tensorflow version = 2.9.1
tfx version        = 1.9.1



## Set up variables

In [18]:
PIPELINE_NAME='penguin-simple'
# Output directory to store artifacts generated from the pipeline.
PIPELINE_ROOT=os.path.join('pipelines', PIPELINE_NAME)
# Path to a SQLite DB file to use as an MLMD storage.
METADATA_PATH=os.path.join('metadata', PIPELINE_NAME, 'metadata.db')
# Output directory where created models from the pipeline will be exported.
SERVING_MODEL_DIR=os.path.join('serving_model', PIPELINE_NAME)

In [19]:
print(f'''
PIPELINE_NAME={PIPELINE_NAME}
PIPELINE_ROOT={PIPELINE_ROOT}
METADATA_PATH={METADATA_PATH}
SERVING_MODEL_DIR={SERVING_MODEL_DIR}
''')


PIPELINE_NAME=penguin-simple
PIPELINE_ROOT=pipelines\penguin-simple
METADATA_PATH=metadata\penguin-simple\metadata.db
SERVING_MODEL_DIR=serving_model\penguin-simple



In [20]:
# Set default logging level.
from absl import logging
logging.set_verbosity(logging.INFO)

## Prepare example data

In [21]:
import urllib.request
import tempfile

In [22]:
# Create a temperory directory
DATA_ROOT=tempfile.mkdtemp(prefix='tfx-data')
_data_url='https://raw.githubusercontent.com/tensorflow/tfx/master/tfx/examples/penguin/data/labelled/penguins_processed.csv'
_data_filepath=os.path.join(DATA_ROOT, 'data.csv')
urllib.request.urlretrieve(_data_url, _data_filepath)

('C:\\Users\\Srush\\AppData\\Local\\Temp\\tfx-dataaj0w7pr4\\data.csv',
 <http.client.HTTPMessage at 0x1c4a7a37400>)

In [23]:
_trainer_module_file='penguin_trainer.py'

## Write a pipeline definition

In [24]:
def _create_pipeline(
    pipeline_name: str, 
    pipeline_root: str, 
    data_root: str,
    module_file: str, 
    serving_model_dir: str,
    metadata_path: str) -> tfx.dsl.Pipeline:
    '''Creates a three component penguin pipeline with TFX.'''
    # Brings data into the pipeline.
    example_gen = tfx.components.CsvExampleGen(input_base=data_root)

    # Uses user-provided Python function that trains a model.
    trainer = tfx.components.Trainer(
        module_file=module_file,
        examples=example_gen.outputs['examples'],
        train_args=tfx.proto.TrainArgs(num_steps=100),
        eval_args=tfx.proto.EvalArgs(num_steps=5)
    )

    # Pushes the model to a filesystem destination.
    pusher = tfx.components.Pusher(
        model=trainer.outputs['model'],
        push_destination=tfx.proto.PushDestination(
            filesystem=tfx.proto.PushDestination.Filesystem(
                base_directory=serving_model_dir
            )
        )
    )
    
    # Following three components will be included in the pipeline.
    components = [
        example_gen,
        trainer,
        pusher,
    ]
    
    return tfx.dsl.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        metadata_connection_config=tfx.orchestration.metadata.sqlite_metadata_connection_config(metadata_path),
        components=components
    )

## Run the pipeline

In [25]:
tfx.orchestration.LocalDagRunner().run(
    _create_pipeline(
        pipeline_name=PIPELINE_NAME,
        pipeline_root=PIPELINE_ROOT,
        data_root=DATA_ROOT,
        module_file=_trainer_module_file,
        serving_model_dir=SERVING_MODEL_DIR,
        metadata_path=METADATA_PATH
    )
)

INFO:absl:Generating ephemeral wheel package for 'c:\\Users\\Srush\\Documents\\GitHub\\TensorFlow-Extended\\penguin_trainer.py' (including modules: ['penguin_trainer']).
INFO:absl:User module package has hash fingerprint version 31faafa906472d00344224fb077aab0b0502c36c6ce9301e65a92720ce2934ad.
INFO:absl:Executing: ['c:\\ProgramData\\Anaconda3\\python.exe', 'C:\\Users\\Srush\\AppData\\Local\\Temp\\tmpvrur1xvx\\_tfx_generated_setup.py', 'bdist_wheel', '--bdist-dir', 'C:\\Users\\Srush\\AppData\\Local\\Temp\\tmpycniyu0h', '--dist-dir', 'C:\\Users\\Srush\\AppData\\Local\\Temp\\tmpoxdjlkeg']
INFO:absl:Successfully built user code wheel distribution at 'pipelines\\penguin-simple\\_wheels\\tfx_user_code_Trainer-0.0+31faafa906472d00344224fb077aab0b0502c36c6ce9301e65a92720ce2934ad-py3-none-any.whl'; target user module is 'penguin_trainer'.
INFO:absl:Full user module path is 'penguin_trainer@pipelines\\penguin-simple\\_wheels\\tfx_user_code_Trainer-0.0+31faafa906472d00344224fb077aab0b0502c36c6ce9

InvalidArgumentError: Failed to create a directory: pipelines\penguin-simple\CsvExampleGen\.system\stateful_working_dir/2022-08-27T18:32:54.408901; Invalid argument