# Trainer Test Run

## Set up

TFX requires apache-airflow and docker SDK.


In [1]:
!pip install 'apache-airflow[gcp]' docker tfx





You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In this notebook, we use TFX version 0.13.0

In [2]:
import tfx
tfx.version.__version__

'0.13.0'

TFX requires TensorFlow >= 1.13.1

In [3]:
import tensorflow as tf
tf.__version__

'1.13.1'

TFX supports Python 3.5 from version 0.13.0

In [4]:
import sys
sys.version

'3.5.2 (default, Nov 12 2018, 13:43:14) \n[GCC 5.4.0 20160609]'

## Download sample data

In [5]:
%%bash
# This enables you to run this notebook twice.
# There should not be train/eval files at ~/taxi/data, since TFX can handle only single file with version 0.13.0
if [ -e ~/taxi/data ]; then
    rm -rf ~/taxi/data
fi

# download taxi data
mkdir -p ~/taxi/data/simple
mkdir -p ~/taxi/serving_model/taxi_simple
wget https://raw.githubusercontent.com/tensorflow/tfx/master/tfx/examples/chicago_taxi_pipeline/data/simple/data.csv -O ~/taxi/data/simple/data.csv

# download 
wget https://raw.githubusercontent.com/tensorflow/tfx/master/tfx/examples/chicago_taxi_pipeline/taxi_utils.py -O ~/taxi/taxi_utils.py

--2019-06-20 08:31:20--  https://raw.githubusercontent.com/tensorflow/tfx/master/tfx/examples/chicago_taxi_pipeline/data/simple/data.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.108.133
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1922668 (1.8M) [text/plain]
Saving to: ‘/root/taxi/data/simple/data.csv’

     0K .......... .......... .......... .......... ..........  2% 2.19M 1s
    50K .......... .......... .......... .......... ..........  5% 5.52M 1s
   100K .......... .......... .......... .......... ..........  7% 8.17M 0s
   150K .......... .......... .......... .......... .......... 10% 5.29M 0s
   200K .......... .......... .......... .......... .......... 13% 7.05M 0s
   250K .......... .......... .......... .......... .......... 15% 3.62M 0s
   300K .......... .......... .......... .......... .......... 18% 5.18M 0s
   350K ........

## Import

In [6]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import datetime
import logging
import os
from google.protobuf import json_format

from tfx.components.base.base_component import ComponentOutputs
from tfx.components.evaluator.component import Evaluator
from tfx.components.example_gen.csv_example_gen.component import CsvExampleGen
from tfx.components.example_validator.component import ExampleValidator
from tfx.components.model_validator.component import ModelValidator
from tfx.components.pusher.component import Pusher
from tfx.components.schema_gen.component import SchemaGen
from tfx.components.statistics_gen.component import StatisticsGen
from tfx.components.trainer.component import Trainer
from tfx.components.transform.component import Transform
from tfx.orchestration.airflow.airflow_runner import AirflowDAGRunner
from tfx.orchestration.pipeline import Pipeline
from tfx.orchestration.tfx_runner import TfxRunner
from tfx.proto import evaluator_pb2
from tfx.proto import example_gen_pb2
from tfx.proto import pusher_pb2
from tfx.proto import trainer_pb2
from tfx.utils.dsl_utils import csv_input
from tfx.utils.channel import Channel
from tfx.utils import types

  'Running the Apache Beam SDK on Python 3 is not yet fully supported. '


## configs

In [7]:
# This example assumes that the taxi data is stored in ~/taxi/data and the
# taxi utility function is in ~/taxi.  Feel free to customize this as needed.
_taxi_root = os.path.join(os.environ['HOME'], 'taxi')
_data_root = os.path.join(_taxi_root, 'data/simple')
# Python module file to inject customized logic into the TFX components. The
# Transform and Trainer both require user-defined functions to run successfully.
_taxi_module_file = os.path.join(_taxi_root, 'taxi_utils.py')

# Path which can be listened to by the model server.  Pusher will output the
# trained model here.
_serving_model_dir = os.path.join(_taxi_root, 'serving_model/taxi_simple')

# Directory and data locations.  This example assumes all of the chicago taxi
# example code and metadata library is relative to $HOME, but you can store
# these files anywhere on your local filesystem.
_tfx_root = os.path.join(os.environ['HOME'], 'tfx')
_pipeline_root = os.path.join(_tfx_root, 'pipelines')
_metadata_db_root = os.path.join(_tfx_root, 'metadata')
_log_root = os.path.join(_tfx_root, 'logs')

# Airflow-specific configs; these will be passed directly to airflow
_airflow_config = {
    'schedule_interval': None,
    'start_date': datetime.datetime(2019, 1, 1),
}

# Logging overrides
logger_overrides = {'log_root': _log_root, 'log_level': logging.INFO}

## Create ExampleGen

In [8]:
"""Implements the chicago taxi pipeline with TFX."""
examples = csv_input(_data_root)

# Brings data into the pipeline or otherwise joins/converts training data.
train_config = example_gen_pb2.SplitConfig.Split(name='train', hash_buckets=2)
eval_config = example_gen_pb2.SplitConfig.Split(name='eval', hash_buckets=1)
output_config = example_gen_pb2.Output(
    split_config=example_gen_pb2.SplitConfig(splits=[
        train_config,
        eval_config
    ]))

# Create outputs
train_examples = types.TfxType(type_name='ExamplesPath', split='train')
train_examples.uri = os.path.join(_data_root, 'csv_example_gen/train/')

eval_examples = types.TfxType(type_name='ExamplesPath', split='eval')
eval_examples.uri = os.path.join(_data_root, 'csv_example_gen/eval/')

example_outputs = ComponentOutputs({
    'examples': Channel(
        type_name='ExamplesPath',
        static_artifact_collection=[train_examples, eval_examples]
    ),
    'training_examples': Channel(
        type_name='ExamplesPath',
        static_artifact_collection=[train_examples]
    ),
    'eval_examples': Channel(
        type_name='ExamplesPath',
        static_artifact_collection=[eval_examples]
    ),    
})

example_gen = CsvExampleGen(
    input_base=examples, # A Channel of 'ExternalPath' type, it contains path of data source.
    output_config=output_config,  # An example_gen_pb2.Output instance, it contains train-eval split ratio.
    outputs=example_outputs # dict from name to output channel, it will be stored example_gen.outputs
)

## Create StatisticsGen

In [9]:
# Create outputs
train_statistics = types.TfxType(type_name='ExampleStatisticsPath', split='train')
train_statistics.uri = os.path.join(_data_root, 'statistics_gen/train/')

eval_statistics = types.TfxType(type_name='ExampleStatisticsPath', split='eval')
eval_statistics.uri = os.path.join(_data_root, 'statistics_gen/eval/')

statistics_outputs = ComponentOutputs({
    'output': Channel(
        type_name='ExampleStatisticsPath',
        static_artifact_collection=[train_statistics, eval_statistics]
    )
})

statistics_gen = StatisticsGen(
    input_data=example_gen.outputs.examples, # A Channel of 'ExamplesPath' type, it is equal to example_outputs
    name='Statistics Generator', # Optional, name should be unique if you are going to use multiple StatisticsGen in same pipeline.
    outputs=statistics_outputs # dict from name to output channel, it will be stored statistics_gen.outputs
)

## Create SchemaGen

In [10]:
# Create outputs
train_schema_path = types.TfxType(type_name='SchemaPath', split='train')
train_schema_path.uri = os.path.join(_data_root, 'schema_gen/')

# NOTE: SchemaGen.executor can handle JUST ONE SchemaPath.
# Two or more SchemaPaths will cause ValueError
# such as "ValueError: expected list length of one but got 2".
schema_outputs = ComponentOutputs({
    'output':Channel(
        type_name='SchemaPath',
        static_artifact_collection=[train_schema_path] 
    )
})

infer_schema = SchemaGen(
    stats=statistics_gen.outputs.output, # A Channel of 'ExampleStatisticsPath' type, it is equal to statistics_outputs
    name='Schema Generator',  # Optional, name should be unique if you are going to use multiple StatisticsGen in same pipeline.
    outputs=schema_outputs # dict from name to output channel, it will be stored schema_gen.outputs
)

## Create Transform

In [11]:
train_examples = types.TfxType(type_name='ExamplesPath', split='train')
train_examples.uri = os.path.join(_data_root,
                                  'transform/transformed_examples/train/')
eval_examples = types.TfxType(type_name='ExamplesPath', split='eval')
eval_examples.uri = os.path.join(_data_root,
                                 'transform/transformed_examples/eval/')
transform_output = types.TfxType(type_name='TransformPath')
transform_output.uri = os.path.join(_data_root,
                                    'transform/transform_output/')

transform_outputs = ComponentOutputs({
    # Output of 'tf.Transform', which includes an exported 
    # Tensorflow graph suitable for both training and serving
    'transform_output':Channel(
        type_name='TransformPath',
        static_artifact_collection=[transform_output]
    ),
    # transformed_examples: Materialized transformed examples, which includes 
    # both 'train' and 'eval' splits.
    'transformed_examples':Channel(
        type_name='ExamplesPath',
        static_artifact_collection=[train_examples, eval_examples]
    )
})

transform = Transform(
    input_data=example_gen.outputs.examples,
    schema=infer_schema.outputs.output,
    module_file=_taxi_module_file,
    outputs=transform_outputs
)

## Create Trainer

In [12]:
model_exports = types.TfxType(type_name='ModelExportPath')
model_exports.uri = os.path.join(_data_root, 'trainer/current/')

trainer_outputs = ComponentOutputs({
    'output':Channel(
        type_name='ModelExportPath',
        static_artifact_collection=[model_exports]
    )
})

trainer = Trainer(
    module_file=_taxi_module_file,
    transformed_examples=transform.outputs.transformed_examples,
    schema=infer_schema.outputs.output,
    transform_output=transform.outputs.transform_output,
    train_args=trainer_pb2.TrainArgs(num_steps=10000),
    eval_args=trainer_pb2.EvalArgs(num_steps=5000),
    outputs=trainer_outputs
)


## Create Model Analyzer

In [13]:
eval_output = types.TfxType('ModelEvalPath')
eval_output.uri = os.path.join(_data_root, 'eval_output/')

model_analyzer_outputs = ComponentOutputs({
    'output':
    Channel(
        type_name='ModelEvalPath',
        static_artifact_collection=[eval_output]),
})

feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec(specs=[
    evaluator_pb2.SingleSlicingSpec(
        column_for_slicing=['trip_start_hour'])
])

model_analyzer = Evaluator(
    examples=example_gen.outputs.examples,
    model_exports=trainer.outputs.output,
    feature_slicing_spec=feature_slicing_spec,
    outputs=model_analyzer_outputs
)

## Create Model Validator

In [14]:
blessing = types.TfxType(type_name='ModelBlessingPath')
blessing.uri = os.path.join(_data_root, 'model_validator/blessed/')

results = types.TfxType(type_name='ModelValidationPath')
results.uri = os.path.join(_data_root, 'model_validator/results/')

model_validator_outputs = ComponentOutputs({
    'blessing':
    Channel(
        type_name='ModelBlessingPath',
        static_artifact_collection=[blessing]),
    'results':
    Channel(
        type_name='ModelValidationPath',
        static_artifact_collection=[results]),
})

model_validator = ModelValidator(
      examples=example_gen.outputs.examples, 
    model=trainer.outputs.output,
    outputs=model_validator_outputs
)

## Create Pipeline

In [15]:
pipeline = Pipeline(
    pipeline_name="TFX Pipeline",
    pipeline_root=_pipeline_root,
    components=[example_gen, statistics_gen, infer_schema, transform, trainer, model_analyzer]
#     components=[model_analyzer]
)

## Execute

In [16]:
class DirectRunner(TfxRunner):
    """Tfx runner on local"""
    
    def __init__(self, config=None):
        self._config = config or {}
    
    def run(self, pipeline):
        for component in pipeline.components:
            self._execute_component(component)
            
        return pipeline
            
    def _execute_component(self, component):
        input_dict = {key:value.get() for key, value in component.input_dict.items()}
        output_dict = {key: value.get() for key, value in component.outputs.get_all().items()}
        exec_properties = component.exec_properties
        executor = component.executor()
        executor.Do(input_dict, output_dict, exec_properties)

In [17]:
pipeline = DirectRunner().run(pipeline)

INFO:tensorflow:Starting Executor execution.
[2019-06-20 08:31:24,167] {base_executor.py:72} INFO - Starting Executor execution.
INFO:tensorflow:Inputs for Executor is: {"input-base": [{"artifact": {"properties": {"split": {"stringValue": ""}, "type_name": {"stringValue": "ExternalPath"}}, "uri": "/root/taxi/data/simple"}, "artifact_type": {"properties": {"state": "STRING", "split": "STRING", "span": "INT", "name": "STRING", "type_name": "STRING"}, "name": "ExternalPath"}}]}
[2019-06-20 08:31:24,175] {base_executor.py:74} INFO - Inputs for Executor is: {"input-base": [{"artifact": {"properties": {"split": {"stringValue": ""}, "type_name": {"stringValue": "ExternalPath"}}, "uri": "/root/taxi/data/simple"}, "artifact_type": {"properties": {"state": "STRING", "split": "STRING", "span": "INT", "name": "STRING", "type_name": "STRING"}, "name": "ExternalPath"}}]}
INFO:tensorflow:Outputs for Executor is: {"eval_examples": [{"artifact": {"properties": {"split": {"stringValue": "eval"}, "type_n

[2019-06-20 08:31:31,980] {fn_api_runner.py:437} INFO - Running ((ShuffleSplittrain/ReshufflePerKey/GroupByKey/Read)+((ref_AppliedPTransform_ShuffleSplittrain/ReshufflePerKey/FlatMap(restore_timestamps)_34)+(ref_AppliedPTransform_ShuffleSplittrain/RemoveRandomKeys_35)))+((((ref_AppliedPTransform_OutputSplittrain/Write/WriteImpl/WriteBundles_42)+(ref_AppliedPTransform_OutputSplittrain/Write/WriteImpl/Pair_43))+(ref_AppliedPTransform_OutputSplittrain/Write/WriteImpl/WindowInto(WindowIntoFn)_44))+(OutputSplittrain/Write/WriteImpl/GroupByKey/Write))
[2019-06-20 08:31:32,444] {fn_api_runner.py:437} INFO - Running ((OutputSplittrain/Write/WriteImpl/GroupByKey/Read)+(ref_AppliedPTransform_OutputSplittrain/Write/WriteImpl/Extract_49))+(ref_PCollection_PCollection_32/Write)
[2019-06-20 08:31:32,458] {fn_api_runner.py:437} INFO - Running (ref_PCollection_PCollection_24/Read)+((ref_AppliedPTransform_OutputSplittrain/Write/WriteImpl/PreFinalize_50)+(ref_PCollection_PCollection_33/Write))
[2019-06-

[2019-06-20 08:31:37,870] {fn_api_runner.py:437} INFO - Running (((((ref_AppliedPTransform_ReadData.eval/Read_106)+(ref_AppliedPTransform_DecodeData.eval/ParseTFExamples_108))+(ref_AppliedPTransform_GenerateStatistics.eval/RunStatsGenerators/KeyWithVoid_111))+((((ref_AppliedPTransform_GenerateStatistics.eval/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/ParDo(SplitHotCold)/ParDo(SplitHotCold)_115)+((ref_AppliedPTransform_GenerateStatistics.eval/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/WindowIntoDiscarding_116)+((GenerateStatistics.eval/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/CombinePerKey(PreCombineFn)/Precombine)+(GenerateStatistics.eval/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/CombinePerKey(PreCombineFn)/Group/Write))))+(GenerateStatistics.eval/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/Flatten/Transcode/0))+(GenerateStatistics.eval/RunStatsGenerators/Gener

[2019-06-20 08:31:47,048] {fn_api_runner.py:437} INFO - Running (GenerateStatistics.train/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/CombinePerKey(PreCombineFn)/Group/Read)+((((GenerateStatistics.train/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/CombinePerKey(PreCombineFn)/Merge)+(GenerateStatistics.train/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/CombinePerKey(PreCombineFn)/ExtractOutputs))+(ref_AppliedPTransform_GenerateStatistics.train/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/Map(StripNonce)_21))+((ref_AppliedPTransform_GenerateStatistics.train/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/WindowIntoOriginal_22)+(GenerateStatistics.train/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/Flatten/Write/1)))
[2019-06-20 08:31:47,298] {fn_api_runner.py:437} INFO - Running ((GenerateStatistics.train/RunStatsGenerators/GenerateSlicedStatisticsImpl/Ba

[2019-06-20 08:31:48,311] {fn_api_runner.py:437} INFO - Running (GenerateStatistics.eval/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/Flatten/Read)+((GenerateStatistics.eval/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/CombinePerKey(PostCombineFn)/Precombine)+(GenerateStatistics.eval/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/CombinePerKey(PostCombineFn)/Group/Write))
[2019-06-20 08:31:48,452] {fn_api_runner.py:437} INFO - Running (GenerateStatistics.eval/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/CombinePerKey(PostCombineFn)/Group/Read)+(((GenerateStatistics.eval/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/CombinePerKey(PostCombineFn)/Merge)+(GenerateStatistics.eval/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/CombinePerKey(PostCombineFn)/ExtractOutputs))+((GenerateStatistics.eval/RunStatsGenerators/GenerateSlicedStatisticsImpl/FlattenFeatureSt

[2019-06-20 08:31:49,364] {base_executor.py:74} INFO - Inputs for Executor is: {"input_data": [{"artifact": {"properties": {"split": {"stringValue": "train"}, "type_name": {"stringValue": "ExamplesPath"}}, "uri": "/root/taxi/data/simple/csv_example_gen/train/"}, "artifact_type": {"properties": {"split": "STRING", "span": "INT", "type_name": "STRING", "name": "STRING", "state": "STRING"}, "name": "ExamplesPath"}}, {"artifact": {"properties": {"split": {"stringValue": "eval"}, "type_name": {"stringValue": "ExamplesPath"}}, "uri": "/root/taxi/data/simple/csv_example_gen/eval/"}, "artifact_type": {"properties": {"split": "STRING", "span": "INT", "type_name": "STRING", "name": "STRING", "state": "STRING"}, "name": "ExamplesPath"}}], "schema": [{"artifact": {"properties": {"split": {"stringValue": "train"}, "type_name": {"stringValue": "SchemaPath"}}, "uri": "/root/taxi/data/simple/schema_gen/"}, "artifact_type": {"properties": {"split": "STRING", "span": "INT", "type_name": "STRING", "name"

INFO:tensorflow:No assets to write.
[2019-06-20 08:31:50,623] {builder_impl.py:449} INFO - No assets to write.
INFO:tensorflow:SavedModel written to: /root/taxi/data/simple/transform/transform_output/.temp_path/tftransform_tmp/6dd54de0fa9e4fd88c5622d2adb3d541/saved_model.pb
[2019-06-20 08:31:50,680] {builder_impl.py:414} INFO - SavedModel written to: /root/taxi/data/simple/transform/transform_output/.temp_path/tftransform_tmp/6dd54de0fa9e4fd88c5622d2adb3d541/saved_model.pb
INFO:tensorflow:Assets added to graph.
[2019-06-20 08:31:53,027] {builder_impl.py:654} INFO - Assets added to graph.
INFO:tensorflow:No assets to write.
[2019-06-20 08:31:53,029] {builder_impl.py:449} INFO - No assets to write.
INFO:tensorflow:SavedModel written to: /root/taxi/data/simple/transform/transform_output/.temp_path/tftransform_tmp/1a148f09dce844b7b191a02f6ede33d6/saved_model.pb
[2019-06-20 08:31:53,065] {builder_impl.py:414} INFO - SavedModel written to: /root/taxi/data/simple/transform/transform_output/.t

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
[2019-06-20 08:32:00,430] {saver.py:1483} INFO - Saver not created because there are no variables in the graph to restore
[2019-06-20 08:32:01,355] {fn_api_runner.py:437} INFO - Running ((AnalyzeDataset/CacheableCombineAccumulate[bucketize_3/quantiles]/InitialCombineGlobally/CombinePerKey/Group/Read)+((AnalyzeDataset/CacheableCombineAccumulate[bucketize_3/quantiles]/InitialCombineGlobally/CombinePerKey/Merge)+(AnalyzeDataset/CacheableCombineAccumulate[bucketize_3/quantiles]/InitialCombineGlobally/CombinePerKey/ExtractOutputs)))+((ref_AppliedPTransform_AnalyzeDataset/CacheableCombineAccumulate[bucketize_3/quantiles]/InitialCombineGlobally/UnKey_323)+(ref_PCollection_PCollection_201/Write))
[2019-06-20 08:32:01,493] {fn_api_runner.py:437} INFO - Running (((AnalyzeDataset/CacheableCombineAccumulate[bucketize/quantiles]/InitialCombineGlobally/CombinePerKey/Group/Read)+(AnalyzeDataset/CacheableCombineAc

[2019-06-20 08:32:01,885] {fn_api_runner.py:437} INFO - Running (AnalyzeDataset/VocabularyOrderAndFilter[compute_and_apply_vocabulary_1/vocabulary]/ApplyFrequencyThresholdAndTopK/Top(1000)/Flatten/Read)+(AnalyzeDataset/VocabularyOrderAndFilter[compute_and_apply_vocabulary_1/vocabulary]/ApplyFrequencyThresholdAndTopK/Top(1000)/GroupByKey/Write)
[2019-06-20 08:32:01,903] {fn_api_runner.py:437} INFO - Running ((AnalyzeDataset/VocabularyOrderAndFilter[compute_and_apply_vocabulary_1/vocabulary]/ApplyFrequencyThresholdAndTopK/Top(1000)/GroupByKey/Read)+(ref_AppliedPTransform_AnalyzeDataset/VocabularyOrderAndFilter[compute_and_apply_vocabulary_1/vocabulary]/ApplyFrequencyThresholdAndTopK/Top(1000)/ParDo(_MergeTopPerBundle)_191))+((ref_AppliedPTransform_AnalyzeDataset/VocabularyOrderAndFilter[compute_and_apply_vocabulary_1/vocabulary]/ApplyFrequencyThresholdAndTopK/FlattenList_192)+(ref_PCollection_PCollection_119/Write))
[2019-06-20 08:32:01,925] {fn_api_runner.py:437} INFO - Running (((ref_A

[2019-06-20 08:32:02,806] {fn_api_runner.py:437} INFO - Running (((ref_AppliedPTransform_AnalyzeDataset/CacheableCombineMerge[bucketize_2/quantiles]/MergeCombinesGlobally/DoOnce/Read_307)+(ref_AppliedPTransform_AnalyzeDataset/CacheableCombineMerge[bucketize_2/quantiles]/MergeCombinesGlobally/InjectDefault_308))+((ref_AppliedPTransform_AnalyzeDataset/CacheableCombineMerge[bucketize_2/quantiles]/ExtractOutputs/FlatMap(extract_outputs)_310)+((ref_AppliedPTransform_AnalyzeDataset/CreateTensorBinding[bucketize_2/quantiles/Placeholder]_311)+(AnalyzeDataset/CreateSavedModel/Flatten/Transcode/10))))+(AnalyzeDataset/CreateSavedModel/Flatten/Write/10)
[2019-06-20 08:32:02,842] {fn_api_runner.py:437} INFO - Running (AnalyzeDataset/CacheableCombineMerge[scale_to_z_score/mean_and_var]/MergeCombinesGlobally/CombinePerKey/Group/Read)+(((AnalyzeDataset/CacheableCombineMerge[scale_to_z_score/mean_and_var]/MergeCombinesGlobally/CombinePerKey/Merge)+(AnalyzeDataset/CacheableCombineMerge[scale_to_z_score/

[2019-06-20 08:32:03,677] {fn_api_runner.py:437} INFO - Running (ref_PCollection_PCollection_122/Read)+((ref_AppliedPTransform_AnalyzeDataset/VocabularyWrite[compute_and_apply_vocabulary_1/vocabulary]/WriteToFile/Write/WriteImpl/PreFinalize_210)+(ref_PCollection_PCollection_130/Write))
[2019-06-20 08:32:03,700] {fn_api_runner.py:437} INFO - Running (ref_PCollection_PCollection_122/Read)+((ref_AppliedPTransform_AnalyzeDataset/VocabularyWrite[compute_and_apply_vocabulary_1/vocabulary]/WriteToFile/Write/WriteImpl/FinalizeWrite_211)+(ref_PCollection_PCollection_131/Write))
[2019-06-20 08:32:03,722] {filebasedsink.py:290} INFO - Starting finalize_write threads with num_shards: 1 (skipped: 0), batches: 1, num_threads: 1
[2019-06-20 08:32:03,827] {filebasedsink.py:327} INFO - Renamed 1 shards in 0.10 seconds.
[2019-06-20 08:32:03,849] {fn_api_runner.py:437} INFO - Running (ref_AppliedPTransform_AnalyzeDataset/VocabularyWrite[compute_and_apply_vocabulary_1/vocabulary]/CreatePath/Read_213)+(((r

[2019-06-20 08:32:07,317] {fn_api_runner.py:437} INFO - Running ((Materialize[1]/Write/Write/WriteImpl/GroupByKey/Read)+(ref_AppliedPTransform_Materialize[1]/Write/Write/WriteImpl/Extract_432))+(ref_PCollection_PCollection_270/Write)
[2019-06-20 08:32:07,332] {fn_api_runner.py:437} INFO - Running ((ref_PCollection_PCollection_262/Read)+(ref_AppliedPTransform_Materialize[1]/Write/Write/WriteImpl/PreFinalize_433))+(ref_PCollection_PCollection_271/Write)
[2019-06-20 08:32:07,356] {fn_api_runner.py:437} INFO - Running (((ref_AppliedPTransform_Materialize[0]/Write/Write/WriteImpl/DoOnce/Read_405)+(ref_AppliedPTransform_Materialize[0]/Write/Write/WriteImpl/InitializeWrite_406))+(ref_PCollection_PCollection_250/Write))+(ref_PCollection_PCollection_251/Write)
[2019-06-20 08:32:07,379] {fn_api_runner.py:437} INFO - Running ((((ref_AppliedPTransform_ReadTransformDataset[0]/Read/Read_365)+(ref_AppliedPTransform_ReadTransformDataset[0]/AddKey_366))+(ref_AppliedPTransform_ReadTransformDataset[0]/Pa

INFO:tensorflow:Outputs for Executor is: {"output": [{"artifact": {"properties": {"split": {"stringValue": ""}, "type_name": {"stringValue": "ModelExportPath"}}, "uri": "/root/taxi/data/simple/trainer/current/"}, "artifact_type": {"properties": {"split": "STRING", "span": "INT", "type_name": "STRING", "name": "STRING", "state": "STRING"}, "name": "ModelExportPath"}}]}
[2019-06-20 08:32:12,469] {base_executor.py:76} INFO - Outputs for Executor is: {"output": [{"artifact": {"properties": {"split": {"stringValue": ""}, "type_name": {"stringValue": "ModelExportPath"}}, "uri": "/root/taxi/data/simple/trainer/current/"}, "artifact_type": {"properties": {"split": "STRING", "span": "INT", "type_name": "STRING", "name": "STRING", "state": "STRING"}, "name": "ModelExportPath"}}]}
INFO:tensorflow:Execution properties for Executor is: {"custom_config": null, "train_args": "{\n  \"numSteps\": 10000\n}", "module_file": "/root/taxi/taxi_utils.py", "eval_args": "{\n  \"numSteps\": 5000\n}"}
[2019-06-2

[2019-06-20 08:32:20,738] {basic_session_run_hooks.py:247} INFO - loss = 18.37844, step = 701 (0.387 sec)
INFO:tensorflow:global_step/sec: 242.594
[2019-06-20 08:32:21,143] {basic_session_run_hooks.py:680} INFO - global_step/sec: 242.594
INFO:tensorflow:loss = 17.579018, step = 801 (0.410 sec)
[2019-06-20 08:32:21,148] {basic_session_run_hooks.py:247} INFO - loss = 17.579018, step = 801 (0.410 sec)
INFO:tensorflow:global_step/sec: 269.97
[2019-06-20 08:32:21,513] {basic_session_run_hooks.py:680} INFO - global_step/sec: 269.97
INFO:tensorflow:loss = 19.37462, step = 901 (0.370 sec)
[2019-06-20 08:32:21,518] {basic_session_run_hooks.py:247} INFO - loss = 19.37462, step = 901 (0.370 sec)
INFO:tensorflow:Saving checkpoints for 999 into /root/taxi/data/simple/trainer/current/serving_model_dir/model.ckpt.
[2019-06-20 08:32:21,833] {basic_session_run_hooks.py:594} INFO - Saving checkpoints for 999 into /root/taxi/data/simple/trainer/current/serving_model_dir/model.ckpt.
Instructions for updat

INFO:tensorflow:global_step/sec: 197.135
[2019-06-20 08:32:44,027] {basic_session_run_hooks.py:680} INFO - global_step/sec: 197.135
INFO:tensorflow:loss = 17.837294, step = 1501 (0.502 sec)
[2019-06-20 08:32:44,032] {basic_session_run_hooks.py:247} INFO - loss = 17.837294, step = 1501 (0.502 sec)
INFO:tensorflow:global_step/sec: 257.15
[2019-06-20 08:32:44,416] {basic_session_run_hooks.py:680} INFO - global_step/sec: 257.15
INFO:tensorflow:loss = 19.760386, step = 1601 (0.389 sec)
[2019-06-20 08:32:44,420] {basic_session_run_hooks.py:247} INFO - loss = 19.760386, step = 1601 (0.389 sec)
INFO:tensorflow:global_step/sec: 228.784
[2019-06-20 08:32:44,853] {basic_session_run_hooks.py:680} INFO - global_step/sec: 228.784
INFO:tensorflow:loss = 14.11869, step = 1701 (0.523 sec)
[2019-06-20 08:32:44,943] {basic_session_run_hooks.py:247} INFO - loss = 14.11869, step = 1701 (0.523 sec)
INFO:tensorflow:global_step/sec: 170.237
[2019-06-20 08:32:45,440] {basic_session_run_hooks.py:680} INFO - glo

INFO:tensorflow:loss = 12.892968, step = 3901 (0.502 sec)
[2019-06-20 08:32:56,177] {basic_session_run_hooks.py:247} INFO - loss = 12.892968, step = 3901 (0.502 sec)
INFO:tensorflow:Saving checkpoints for 3996 into /root/taxi/data/simple/trainer/current/serving_model_dir/model.ckpt.
[2019-06-20 08:32:56,504] {basic_session_run_hooks.py:594} INFO - Saving checkpoints for 3996 into /root/taxi/data/simple/trainer/current/serving_model_dir/model.ckpt.
INFO:tensorflow:Skip the current checkpoint eval due to throttle secs (600 secs).
[2019-06-20 08:32:56,770] {training.py:525} INFO - Skip the current checkpoint eval due to throttle secs (600 secs).
INFO:tensorflow:global_step/sec: 155.317
[2019-06-20 08:32:56,812] {basic_session_run_hooks.py:680} INFO - global_step/sec: 155.317
INFO:tensorflow:loss = 15.549839, step = 4001 (0.647 sec)
[2019-06-20 08:32:56,824] {basic_session_run_hooks.py:247} INFO - loss = 15.549839, step = 4001 (0.647 sec)
INFO:tensorflow:global_step/sec: 216.427
[2019-06-2

[2019-06-20 08:33:05,341] {basic_session_run_hooks.py:680} INFO - global_step/sec: 338.185
INFO:tensorflow:loss = 19.382275, step = 6201 (0.298 sec)
[2019-06-20 08:33:05,350] {basic_session_run_hooks.py:247} INFO - loss = 19.382275, step = 6201 (0.298 sec)
INFO:tensorflow:global_step/sec: 338.919
[2019-06-20 08:33:05,636] {basic_session_run_hooks.py:680} INFO - global_step/sec: 338.919
INFO:tensorflow:loss = 15.390554, step = 6301 (0.292 sec)
[2019-06-20 08:33:05,642] {basic_session_run_hooks.py:247} INFO - loss = 15.390554, step = 6301 (0.292 sec)
INFO:tensorflow:global_step/sec: 326.147
[2019-06-20 08:33:05,943] {basic_session_run_hooks.py:680} INFO - global_step/sec: 326.147
INFO:tensorflow:loss = 16.34752, step = 6401 (0.307 sec)
[2019-06-20 08:33:05,949] {basic_session_run_hooks.py:247} INFO - loss = 16.34752, step = 6401 (0.307 sec)
INFO:tensorflow:global_step/sec: 286.887
[2019-06-20 08:33:06,291] {basic_session_run_hooks.py:680} INFO - global_step/sec: 286.887
INFO:tensorflow:l

INFO:tensorflow:loss = 12.187817, step = 8601 (0.489 sec)
[2019-06-20 08:33:13,758] {basic_session_run_hooks.py:247} INFO - loss = 12.187817, step = 8601 (0.489 sec)
INFO:tensorflow:global_step/sec: 185.194
[2019-06-20 08:33:14,280] {basic_session_run_hooks.py:680} INFO - global_step/sec: 185.194
INFO:tensorflow:loss = 12.776365, step = 8701 (0.535 sec)
[2019-06-20 08:33:14,292] {basic_session_run_hooks.py:247} INFO - loss = 12.776365, step = 8701 (0.535 sec)
INFO:tensorflow:global_step/sec: 207.658
[2019-06-20 08:33:14,761] {basic_session_run_hooks.py:680} INFO - global_step/sec: 207.658
INFO:tensorflow:loss = 14.660483, step = 8801 (0.474 sec)
[2019-06-20 08:33:14,766] {basic_session_run_hooks.py:247} INFO - loss = 14.660483, step = 8801 (0.474 sec)
INFO:tensorflow:global_step/sec: 335.02
[2019-06-20 08:33:15,060] {basic_session_run_hooks.py:680} INFO - global_step/sec: 335.02
INFO:tensorflow:loss = 15.654726, step = 8901 (0.302 sec)
[2019-06-20 08:33:15,068] {basic_session_run_hooks

INFO:tensorflow:Finished evaluation at 2019-06-20-08:33:44
[2019-06-20 08:33:44,498] {evaluation.py:277} INFO - Finished evaluation at 2019-06-20-08:33:44
INFO:tensorflow:Saving dict for global step 10000: accuracy = 0.79386, accuracy_baseline = 0.76983, auc = 0.94384927, auc_precision_recall = 0.7359488, average_loss = 0.33710176, global_step = 10000, label/mean = 0.23017, loss = 13.484071, precision = 0.7267409, prediction/mean = 0.22684945, recall = 0.16731112
[2019-06-20 08:33:44,507] {estimator.py:1979} INFO - Saving dict for global step 10000: accuracy = 0.79386, accuracy_baseline = 0.76983, auc = 0.94384927, auc_precision_recall = 0.7359488, average_loss = 0.33710176, global_step = 10000, label/mean = 0.23017, loss = 13.484071, precision = 0.7267409, prediction/mean = 0.22684945, recall = 0.16731112
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 10000: /root/taxi/data/simple/trainer/current/serving_model_dir/model.ckpt-10000
[2019-06-20 08:33:44,525] {estimator

INFO:tensorflow:Signatures INCLUDED in export for Eval: ['eval']
[2019-06-20 08:33:49,958] {export.py:587} INFO - Signatures INCLUDED in export for Eval: ['eval']
INFO:tensorflow:Restoring parameters from /root/taxi/data/simple/trainer/current/serving_model_dir/model.ckpt-10000
[2019-06-20 08:33:50,220] {saver.py:1270} INFO - Restoring parameters from /root/taxi/data/simple/trainer/current/serving_model_dir/model.ckpt-10000
INFO:tensorflow:Assets added to graph.
[2019-06-20 08:33:50,330] {builder_impl.py:654} INFO - Assets added to graph.
INFO:tensorflow:Assets written to: /root/taxi/data/simple/trainer/current/eval_model_dir/temp-b'1561019627'/assets
[2019-06-20 08:33:50,333] {builder_impl.py:763} INFO - Assets written to: /root/taxi/data/simple/trainer/current/eval_model_dir/temp-b'1561019627'/assets
INFO:tensorflow:SavedModel written to: /root/taxi/data/simple/trainer/current/eval_model_dir/temp-b'1561019627'/saved_model.pb
[2019-06-20 08:33:50,800] {builder_impl.py:414} INFO - Save

  | 'IncrementCounter' >> beam.Map(increment_counter))


[2019-06-20 08:33:51,808] {fn_api_runner.py:437} INFO - Running (((ref_AppliedPTransform_ExtractEvaluateAndWriteResults/WriteResults/WriteTFRecord(/root/taxi/data/simple/eval_output/plots)/WriteToTFRecord/Write/WriteImpl/DoOnce/Read_84)+(ref_AppliedPTransform_ExtractEvaluateAndWriteResults/WriteResults/WriteTFRecord(/root/taxi/data/simple/eval_output/plots)/WriteToTFRecord/Write/WriteImpl/InitializeWrite_85))+(ref_PCollection_PCollection_43/Write))+(ref_PCollection_PCollection_44/Write)
[2019-06-20 08:33:51,832] {fn_api_runner.py:437} INFO - Running (((ref_AppliedPTransform_ExtractEvaluateAndWriteResults/WriteEvalConfig(EvalConfig(model_location='/root/taxi/data/simple/trainer/current/eval_model_dir/1561019627', data_location='<user provided PCollection>', slice_spec=[SingleSliceSpec(columns=frozenset({'trip_start_hour'}), features=frozenset()), SingleSliceSpec(columns=frozenset(), features=frozenset())], example_weight_metric_key='post_export_metrics/example_count', num_bootstrap_samp

[2019-06-20 08:34:02,736] {fn_api_runner.py:437} INFO - Running (((ref_AppliedPTransform_ExtractEvaluateAndWriteResults/WriteResults/WriteTFRecord(/root/taxi/data/simple/eval_output/metrics)/WriteToTFRecord/Write/WriteImpl/DoOnce/Read_68)+(ref_AppliedPTransform_ExtractEvaluateAndWriteResults/WriteResults/WriteTFRecord(/root/taxi/data/simple/eval_output/metrics)/WriteToTFRecord/Write/WriteImpl/InitializeWrite_69))+(ref_PCollection_PCollection_33/Write))+(ref_PCollection_PCollection_34/Write)
[2019-06-20 08:34:02,757] {fn_api_runner.py:437} INFO - Running ((ExtractEvaluateAndWriteResults/WriteResults/WriteTFRecord(/root/taxi/data/simple/eval_output/metrics)/WriteToTFRecord/Write/WriteImpl/GroupByKey/Read)+(ref_AppliedPTransform_ExtractEvaluateAndWriteResults/WriteResults/WriteTFRecord(/root/taxi/data/simple/eval_output/metrics)/WriteToTFRecord/Write/WriteImpl/WriteBundles_76))+(ref_PCollection_PCollection_40/Write)
[2019-06-20 08:34:02,792] {fn_api_runner.py:437} INFO - Running ((ref_PCo

[2019-06-20 08:34:03,415] {fn_api_runner.py:437} INFO - Running (ExtractEvaluateAndWriteResults/ExtractAndEvaluate/EvaluateMetricsAndPlots/ComputeMetricsAndPlots/FanoutSlices/TrackDistinctSliceKeys/Size/CombineGlobally(CountCombineFn)/CombinePerKey/Group/Read)+((ExtractEvaluateAndWriteResults/ExtractAndEvaluate/EvaluateMetricsAndPlots/ComputeMetricsAndPlots/FanoutSlices/TrackDistinctSliceKeys/Size/CombineGlobally(CountCombineFn)/CombinePerKey/Merge)+(((ExtractEvaluateAndWriteResults/ExtractAndEvaluate/EvaluateMetricsAndPlots/ComputeMetricsAndPlots/FanoutSlices/TrackDistinctSliceKeys/Size/CombineGlobally(CountCombineFn)/CombinePerKey/ExtractOutputs)+(ref_AppliedPTransform_ExtractEvaluateAndWriteResults/ExtractAndEvaluate/EvaluateMetricsAndPlots/ComputeMetricsAndPlots/FanoutSlices/TrackDistinctSliceKeys/Size/CombineGlobally(CountCombineFn)/UnKey_43))+(ref_PCollection_PCollection_20/Write)))
[2019-06-20 08:34:03,454] {fn_api_runner.py:437} INFO - Running (ref_AppliedPTransform_ExtractEval

## Check Result

In [18]:
!ls -Rlhs /root/taxi/data/simple/

/root/taxi/data/simple/:
total 1.9M
4.0K drwxr-xr-x 4 root root 4.0K Jun 20 08:31 csv_example_gen
1.9M -rw-r--r-- 1 root root 1.9M Jun 20 08:31 data.csv
4.0K drwxr-xr-x 2 root root 4.0K Jun 20 08:34 eval_output
4.0K drwxr-xr-x 2 root root 4.0K Jun 20 08:31 schema_gen
4.0K drwxr-xr-x 4 root root 4.0K Jun 20 08:31 statistics_gen
4.0K drwxr-xr-x 3 root root 4.0K Jun 20 08:32 trainer
4.0K drwxr-xr-x 4 root root 4.0K Jun 20 08:32 transform

/root/taxi/data/simple/csv_example_gen:
total 8.0K
4.0K drwxr-xr-x 2 root root 4.0K Jun 20 08:31 eval
4.0K drwxr-xr-x 2 root root 4.0K Jun 20 08:31 train

/root/taxi/data/simple/csv_example_gen/eval:
total 204K
204K -rw-r--r-- 1 root root 201K Jun 20 08:31 data_tfrecord-00000-of-00001.gz

/root/taxi/data/simple/csv_example_gen/train:
total 408K
408K -rw-r--r-- 1 root root 405K Jun 20 08:31 data_tfrecord-00000-of-00001.gz

/root/taxi/data/simple/eval_output:
total 16K
4.0K -rw-r--r-- 1 root root  506 Jun 20 08:34 eval_config
 12K

In [23]:
def get_eval_dir(model_analyzer):
    artifact = model_analyzer.outputs.output.get()
    return types.get_single_uri(artifact)
    
eval_dir = get_eval_dir(model_analyzer)

In [30]:
import tensorflow_model_analysis as tfma
result = tfma.load_eval_result(eval_dir)

In [31]:
tfma.view.render_slicing_metrics(result, slicing_column='trip_start_hour')

SlicingMetricsViewer(config={'weightedExamplesColumn': 'post_export_metrics/example_count'}, data=[{'metrics':…

In [33]:
tfma.view.render_plot(result, tfma.slicer.SingleSliceSpec(features=[('trip_start_hour', 1)]))

ValueError: No slice matching slicing spec is found.

In [32]:
result

EvalResult(slicing_metrics=[((('trip_start_hour', 3),), {'label/mean': {'doubleValue': 0.2527472674846649}, 'precision': {'doubleValue': 0.0}, 'recall': {'doubleValue': 0.0}, 'auc_precision_recall': {'doubleValue': 0.6943898797035217}, 'accuracy_baseline': {'doubleValue': 0.7472527027130127}, 'average_loss': {'doubleValue': 0.3715084195137024}, 'auc': {'doubleValue': 0.9299871921539307}, 'prediction/mean': {'doubleValue': 0.20461934804916382}, 'post_export_metrics/example_count': {'doubleValue': 91.0}, 'accuracy': {'doubleValue': 0.7362637519836426}}), ((('trip_start_hour', 2),), {'accuracy': {'doubleValue': 0.8333333134651184}, 'precision': {'doubleValue': 1.0}, 'recall': {'doubleValue': 0.07999999821186066}, 'auc_precision_recall': {'doubleValue': 0.68250572681427}, 'accuracy_baseline': {'doubleValue': 0.8188405632972717}, 'average_loss': {'doubleValue': 0.3084699511528015}, 'post_export_metrics/example_count': {'doubleValue': 138.0}, 'prediction/mean': {'doubleValue': 0.199899137020