# Install Requirements

In [None]:
%cd drive/My\ Drive/Building\ ML\ Pipelines/
!pip install -r requirements.txt
%cd ..
%cd ..

# Chapter 10: Advanced TensorFlow Extended

This chapter focuses on building your own TFX components or more compelx pipeline graphs. Furthermore advanced concepts of pipeline structures will be introduced.

## Advanced Pipeline Concepts

Three additional concepts will be discussed:
 - Training multiple models simultaneously
 - Exporting models for mobile deployments
 - Warm starting model training

### Training Multiple Models Simultaneously

You can assembly a graph with mutliple models with TFX by defining multiple Trainer components.

In [None]:
# Function to instantiate the Trainer efficiently
def set_trainer(module_file, instance_name, train_steps=5000, eval_steps=100):
    return Trainer(module_file=trainer_file,
                   custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor),
                   examples=transform.outputs['transformed_examples'],
                   schema=schema_gen.outputs['schema'],
                   transform_graph=transform.outputs['transform_graph'],
                   train_args=trainer_pb2.TrainArgs(num_steps=TRAINING_STEPS),
                   eval_args=trainer_pb2.EvalArgs(num_steps=EVALUATION_STEPS),
                   instance_name=instance_name
    )

# Load moduel for each Trainer
prod_module_file = os.path.join(pipeline_dir, "prod_module.py")
trial_module_file = os.path.join(pipeline_dir, "trial_module.py")

# Instantiate a Trainer component for each graph branch
trainer_prod_model = set_trainer(module_file, "production_model")
trainer_trial_model = set_trainer(trial_module_file, "trial_model", train_steps=10000, eval_steps=500)

Each instantiated training component needs to be consumed by its own Evaluator.

In [None]:
evaluator_prod_model = Evaluator(
    examples=example_gen.outputs["examples"],
    model=trainer_prod_model.outputs["model"],
    eval_config=eval_config_prod_model,
    instance_name="production_model"
)

evaluator_trial_model = Evaluator(
    examples=example_gen.outputs["examples"],
    model=trainer_trial_model.outputs["model"],
    eval_config=eval_config_trial_model,
    instance_name="trial_model"
)

### Exporting TFLite Models

Very few changes are required for mobile deployment compared to deployment to model servers, like discussed in Chapter 8.

**TFLite Limitations**
Because of hardware limitations of mobile and edge devices, TFLite doesn't support all TensorFlow operations. Therefore not every model can be converted to a TFLite-compatible model. For more information see the <a href="https://www.tensorflow.org/lite">TFLite Website</a>

We can use the branch strategy of the above section and amend the run_fn function of the module file to rewrite the saved models to a TFLite-compatible format.

In [None]:
from tfx.components.trainer.executor import TrainerFnArgs
from tfx.components.trainer.rewriting import converters, rewriter, rewriter_factory

In [None]:
def fun_fn(fn_args: TrainerFnArgs):
    ...
    temp_saving_model_dir = os.path.join(fn_args.serving_model_dir, "temp")
    # Export the model as saved model
    model.save(temp_saving_model_dir, save_format="tf", signatures=signatures)

    # Instantiate the TFLite rewriter
    tfrw = rewriter_factory.create_rewriter(
        rewriter_factory.TFLITE_REWRITER,
        name="tflite_rewriter",
        enable_experimental_new_converter=True
    )

    # Convert the model to TFLite format
    converters.rewrite_saved_model(
        temp_saving_model_dir,
        fn_args.serving_model_dir,
        tfrw,
        rewriter.ModelType.TFLITE_MODEL
    )

    # Delete the saved model after conversion
    tf.io.gfile.rmtree(temp_saving_model_dir)

Evaluate the TFLite-compliant model, which is helpful in detecting whether the model optimizations (e.g. quantization) have led to a degradation of the model's performance.

In [None]:
import tensorflow_model_analysis as tfma
from tfx.components import Evaluator

In [None]:
eval_config = tfma.EvalConfig(
    models_specs=[tfma.ModelSpec(label_key="my_label", model_type=tfma.TF_LITE)],
    ...
)

evaluator = Evaluator(
    examples=example_gen.outputs["examples"],
    model=traininer_mobile_model.outputs["model"],
    eval_config=eval_config,
    instance_name="tflite_model"
)

With this presented pipeline setup, we can now produce models for mobile deployment automatically and push them in the artifact stores for model deployment in mobiel apps.<br>
For example a Pusher component could ship the produced TFLite model to a cloud bucket where a obile developer could pick up the model and deploy it with <a href="https://developers.google.com/ml-kit">Google's ML Kit</a> in an iOS or Android mobile app.

**Converting Models to TensorFlow.js**
This new feature allows to deploy model to web browser and Node.js runtime environments. For details see page 287.

### Warm Starting Model Training

In a TFX pipeline, warm start training requires the Resolver component that we introduced in Chapter 7. The Resolver picks up the details of the latest trained model and passes them on to the Trainer component.

In [2]:
latest_model_resolver = ResolverNode(
    instance_name="latest_model_resolver",
    resolver_class=latest_artifacts_resolver.LatestArtifactsResolver,
    latest_model=Channel(type=Model)
)

The latest model is then passed to the Trainer using the base_model argument.

In [None]:
trainer = Trainer(
    module_file=trainer_file,
    transformed_examples=transform.outputs["transformed_examples"],
    custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor),
    examples=transform.outputs['transformed_examples'],
    schema=schema_gen.outputs['schema'],
    base_model=latest_model_resolver.outputs["latest_model"],
    transform_graph=transform.outputs['transform_graph'],
    rain_args=trainer_pb2.TrainArgs(num_steps=TRAINING_STEPS),
    eval_args=trainer_pb2.EvalArgs(num_steps=EVALUATION_STEPS),
)

## Human in the Loop

The idea is to let a human review the trained model after the automatic model analysis, to spot check the trained model or to gain confidence in the automated pipeline setup. This would mean that the human generate the *blessing* which allows the Pusher component to push the model. This whole setup works via Slack, for dtails see pages 289ff.

### Slack Component Setup

See pages 291f.

### How to use The Slack Component

In [None]:
from slack_component.component import SlackComponent

In [None]:
slack_validator = SlackComponent(
    model=trainer.outputs["model"],
    model_blessing=model_validator.outputs["blessing"],
    # Load the Slack token from your environment
    slack_token=os.environ["SLACK_BOT_TOEKN"],
    # Specify the channel where the message should appear
    slack_channel_id="my-channel-id",
    timeout_sec=3600 # in seconds
)

When executed, the component will post a message and wait up to an hour (defined in timeout_sec) for an answer. During this time, a data scientist can evaluate the model and respond with their approval or rejection. The downstream component (e.g. a Pusher component) can consume the result from the Slack component, as shown in the following code:

In [None]:
pusher = Pusher(
    model=trainer.outputs["model"],
    # Model blessing provided by the Slack component
    model_blessing=slack_validator.outputs["slack_blessing"],
    push_destination=pusher_pb2.PushDestination(
        filesystem=pusher_pb2.PushDestination.Filesystem(
            base_directory=serving_model_dir
        )
    )
)

**Slack API Standards**
The Implementation of the Slack component relies on the Real Time Messaging (RTM) protocol. This protocol is deprecated and might be replaced by a new protocol standard, which would affect the component's functionality.

## Custom TFX Components

Remember from Chapter 2, that each component consits of three parts:
 - driver
 - executor
 - publisher

### Use Cases of Custom Components

Custom components can be applied anywhere along the machine learning pipeline. The concepts discussed in the following provide the highest flexibility to customize your machine learning pipelines to your needs. Some ideas might be:
 - Ingesting data from your custom database
 - Sending an email with the generated data statistics to the data science team
 - Notifying the DevOps team if a new model was exported
 - Kicking off a post-export build process for Docker containers
 - Tracking additional information in your machine learning audit trail

### Writing a Custom Component from Scratch

We will need to implement a few component pieces:
 - 1. Define the inputs and outputs of our component as a ComponentSpec
 - 2. Define our component executor, which defines how the input data should be processed and how the output data is generated
  - 2.1 If the component requires inputs that are not registered in the metadata store, we will need to write a custom component driver, e.g. this happens when we want to register an image path in the component and the artifact has not been registered in the metadata store previously.

**Try to Reuse Components**
Try to reuse already existing TFX components and change the executor instead, this is explained further down below.

#### Component Specifications

The component specifications or ComponentSpec, define how components communicate with each other. They describe the component inputs, outputs and potential component parameters that are required during the component execution.
The following example shows a definition of our component specifications for our image ingestion component:

In [None]:
from tfx.types.component_spec import ChannelParameter, ExecutionParameter
from tfx.types import standard_artifacts

In [4]:
class ImageIngestComponentSpec(types.ComponentSpec):
    """
    ComponentSpec for a Custom TFX Image Ingestion Component.
    """
    PARAMETERS = {
        "name": ExecutionParameter(type=Text),
    }
    INPUTS = {
        # Using ExternalArtifact to allow new input paths
        "input": ChannelParameter(type=standard_artifacts.ExternalArtifact)
    }
    OUTPUTS = {
        # Exporting Examples
        "examples": ChannelParameter(type=standard_artifacts.Examples)
    }

#### Component Channels

The standard types are usually the ones used above: *ExternalArtifact* and *Examples*. Here is a small list of availabe types:
 - ExampleStatistics
 - Model
 - ModelBlessing
 - Bytes
 - String
 - Integer
 - Float
 - Hyperparameter

#### Component Executors

This defines the processes inside the component, including how inputs are used to generate the component outputs. Here we will write it from Scratch, but we can rely on TFX classes to inherit function patterns.

In [None]:
from tfx.components.base import base_executor

**Artifacts contain References**
The information provided via the input_dict and output_dict contain the information stored in the metadata store. These are the references to the artifacts, not the underlying data itself, e.g. the input_dict will contain a protocol buffer with the file location information instead of the data, this allows us to process the data efficiently with programs like Apache Beam.

In [None]:
class Executor(base_executor.BaseExecutor):
    """
    Executor for Image Ingestion Component.
    """
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exe_properties: Dict[Text, Any]) -> None:
        ...

In [None]:
# Reuse the implentation that we discussed in Image Data for Computer Vision Problems
# to convert images to TFRecord data structures.
def convert_image_to_TFExample(image_filename, tf_writer, input_base_uri):
    # Assemble the complete image path
    image_path = os.path.join(input_base_uri, image_filename)

    # Determine the label for each image base on the file path
    lowered filename = image_path.lower()
    if "dog" in lowered_filename:
        label = 0
    elif "cat" in lowered_filename:
        label = 1
    else:
        raise NotImplementedError("Found unknown image")

    # Read the image
    raw_file = tf.io.read_file(image_path)

    # Create the TensorFlow Example data structures
    example = tf.train.Example(
        features=tf.train.Features(
            features={
                "image_raw": _bytes_features(raw_file.numpy()),
                "label": _int64_feature(label)
            }
        )
    )

    # Write the tf.Example to TFRecord files
    writer.write(example.SerializeToString())

We now want our very basic component to load our images, convert them to tf.Examples and return two image sets for training and evaluation.

In [None]:
class ImageIngestionExecutor(base_executor.BaseExecutor):
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exe_properties: Dict[Text, Any]) -> None:
        # Log arguments
        self._log_startup(input_dict, output_dict, exec_properties)

        # Get the folder path from the artifact
        input_base_uri = artifact_utils.get_single_uri(input_dict["input"])

        # Obtain all the filenames
        image_files = tf.io.gfile.listdir(input_base_uri)
        random.shuffle(image_files)
        splits = get_splits(images)

        for split_name, images in splits:
            # Set the split Uniform Resource Identifier (URI)
            output_dir = artifact_utils.get_split_uri(
                output_dict["examples"],
                split_name
            )

            tfrecord_filename = os.path.join(output_dir, "images.tfrecord")
            options = tf.io.TFRecordOptions(compression_type=None)

    # Create a TFRecord writer instace with options
    writer = tf.io.TFRecordWriter(tfrecord_filename, options=options)
    for images in images:
        # Write an image to a file containing the TFRecord data structures
        convert_image_to_TFExample(image, tf_writer, input_base_dir)

For more details and information about dynamically setting data splits, see page 303.

#### Component Drivers

In our case we want to ingest data from a disk and we are reading the data for the first time in our pipeline, therefore the data isn't passed down from a different component and we need to register the data sources in the metadata store. If this would not be done, that the component with the executor we have defined so far would encounter a TFX error.

**Custom Drivers Are Rare**
If you can reuse the input/output architecture of an existing TFX component or if the inputs are already registered with the metadata store, you will not need to write a custom driver and you can skip this step.

See the explicit code on pages 304f.

In [6]:
class ImageIngestDriver(base_driver.BaseDriver):
    """
    Custom driver for ImageIngest.
    """
    def resolve_input_artifacts(self, ...):
        ...

#### Assembling the custom component

To define the actual component, we need to define the specification, executor and driver classes. We can do this by setting SPEC_CLASS, EXECUTOR_SPEC and DRIVER_CLASS. As the final step, we need to instantiate our ComponentSecs with the component's arguemtns (e.g. input and output exaples and the provided name) and pass it to the instantiated ImageIngestComponent.

In [None]:
from tfx.components.base import base_component
from tfx import types
from tfx.types import channel_utils

In [None]:
class ImageIngestComponent(base_component.BaseComponent):
    """
    Custom ImageIngestWorld Component
    """
    SPEC_CLASS = ImageIngestComponentSpec
    EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(ImageIngestExecutor)
    DRIVER_CLASS = ImageIngestDriver

    def __init__(self, input, output_data=None, name=None):
        if not output_data:
            examples_artifact = standard_artifacts.Examples()
            examples_artifact.split_names = artifact_utils.encode_split_names(["train", "eval"])
            output_data = channel_utils.as_channel([examples_artifcat])

        spec = ImageIngestComponentSpec(
            input=input,
            examples=output_data,
            name=name
        )
        super(ImageIngestComponent, self).__init__(spec=spec)

#### Using our basic custom component

In [None]:
import os

from tfx.utils.dsl_utils import external_input
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext
from image_ingestion_component.component import ImageIngestComponent

In [None]:
context = InteractiveContext()

image_file_path = "/path/to/files"
examples = external_input(dataimage_file_path_root)
example_gen = ImageIngestComponent(input="examples", name=u"ImageIngestComponent")

context.run(example_gen)

In [None]:
# The above components output can than be consumend by downstream components like StatisticsGen
from tfx.components import StatisticsGen

In [None]:
statistics_gen = StatisticsGen(
    examples=example_gen:outputs["examples"]
)

context.run(statistics_gen)
context.show(statistics_gen.outputs["statistics"])

**Very Basic Implementation**
Caution: The discussed implementation only provides basic functionality and is not production ready. For details see the following section. For a product-ready implementation, see the updated component implementation in the next sections.

#### Implementation Review

While the component is functioning, it is missing some key functionality that we discussed in Chapter 3, e.g. dynamic split names or split ratios). Furter it required a lot of boiler-plate code, e.g. setting up the component driver. The ingestion should be handled in a more efficient way. This can be achieved by using Apache Beam under the hood of TFX components.

### Reusing Existing Components

Instead of writing everything from scratch, we can inherit an existing component and customize it by overwriting the executor functionality.
Similarly to the Avro or Parquet examples from Chapter 3, we can simply focus on developing our custom executor and making it more flexible as our previous basic component. By reusing existing code infrastructure, we can also piggyback on existing Apache Beam implementations.<br>
TFX and Apache Beam provide classes and function decorators to ingest the data via Apache Beam pipelines. We will use the function decorator *@beam.ptransform_fn* which allows us to define Apache Beam transformation (PTransform). The following code example is an updated version of the previous conversion function:

In [7]:
# Only the file path is needed
def convert_image_to_TFExample(image_path):
    # Determine the label for each image base on the file path
    lowered filename = image_path.lower()
    if "dog" in lowered_filename:
        label = 0
    elif "cat" in lowered_filename:
        label = 1
    else:
        raise NotImplementedError("Found unknown image")

    # Read the image
    raw_file = tf.io.read_file(image_path)

    # Create the TensorFlow Example data structures
    example = tf.train.Example(
        features=tf.train.Features(
            features={
                "image_raw": _bytes_features(raw_file.numpy()),
                "label": _int64_feature(label)
            }
        )
    )

    # The function returns examples instead of writing them to a disk.
    return example

With the updated conversion function in place, we can now focus on implementing the core executor functionality.

In [None]:
@beam.ptransform_fn
def image_to_example(
    pipeline: beam.Pipeline,
    input_dict: Dict[Text, List[types.Artifact]],
    exec_properties: Dict[Text, Any],
    split_pattern: Text) -> beam.pvalue.PCollection:
    input_base_uri = artifact_utils.get_single_uri(input_dict["input"])
    image_pattern = os.path.join(input_base_uri, split_pattern)
    absl.logging.info(
        "Processing input image data {} to tf.Example.".format(image_pattern)
    )

    # Convert the list of files present in the ingestion paths
    image_files = tf.io.gfile.glob(image_pattern)
    if not image_files:
        raise RuntimeError(
            "Split pattern {} did not match any valid path.".format(image_pattern)
        )

    p_collection = (
        pipeline
        # Convert the list to a Beam PCollection
        | beam.Create(image_files)
        # Apply the conversion to every image
        | "ConvertImageToTFRecords" >> beam.Map(lambda image: convert_image_to_TFExample(image))
    )

    return p_collection

Now as the final step, overwrite the GetInputSourceToExamplePTransform of the BaseExampleGenExecutor with our image_to_example.

In [None]:
class ImageExampleGenExecutor(BaseExampleGenExecutor):

    @beam.ptransform_fn
    def image_to_example(...):
        ...

    def GetInputSourceToExamplePTransform(self) -> beam.PTransform:
        return image_to_example

#### Using our custom executor

We can now follow the same patterns we discusseed for the Avro ingestion in Chapter 3 and specify the custom_executor_spec. By doing this, we can use the entire functionality of ingestion components.<br>
Complete example of using our custom component:

In [None]:
from tfx.components import FileBaseExampleGen
from tfx.utils.dsl_utils import external_input

from image_ingestion_component.executor import ImageExapleGenExecutor

input_config = example_gen_pb2.Input(
    splits=[
            example_gen_pb2.Input.Split(
                name="images",
                pattern="sub-directory/if/needed/*.jpg"
            )
    ]
)

output = example_gen_pb2.Output(
    split_config=example_gen_pb2.SplitConfig(
        splits=[
                example_gen_pb2.SplitConfig.Split(
                    name="train",
                    hash_buckets=4
                ),
                example_gen_pb2.SplitConfig.Split(
                    name="eval",
                    hash_buckets=1
                )
        ]
    )
)

example_gen = FileBasedExampleGen(
    input=external_input("/path/to/images/"),
    input_config=input_config,
    output_config=output,
    custom_executor_spec=executor_spech.ExecutorClassSpec(ImageExampleGenExecutor)
)

# References and Additional Resources

- <a href="https://www.tensorflow.org/lite">TFLite Website</a>
- <a href="https://developers.google.com/ml-kit">Google's ML Kit</a>
- <a href="https://api.slack.com/">Slack API</a>
- <a href="https://www.tensorflow.org/tfx/guide">TensorFlow Extended Guide</a>