# Trainer Test Run

## Set up

TFX requires apache-airflow and docker SDK.


In [29]:
!pip install 'apache-airflow[gcp]' docker tfx





You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In this notebook, we use TFX version 0.13.0

In [30]:
import tfx
tfx.version.__version__

'0.13.0'

TFX requires TensorFlow >= 1.13.1

In [31]:
import tensorflow as tf
tf.__version__

'1.13.1'

TFX supports Python 3.5 from version 0.13.0

In [32]:
import sys
sys.version

'3.5.2 (default, Nov 12 2018, 13:43:14) \n[GCC 5.4.0 20160609]'

## Download sample data

In [33]:
%%bash
# This enables you to run this notebook twice.
# There should not be train/eval files at ~/taxi/data, since TFX can handle only single file with version 0.13.0
if [ -e ~/taxi/data ]; then
    rm -rf ~/taxi/data
fi

# download taxi data
mkdir -p ~/taxi/data/simple
mkdir -p ~/taxi/serving_model/taxi_simple
wget https://raw.githubusercontent.com/tensorflow/tfx/master/tfx/examples/chicago_taxi_pipeline/data/simple/data.csv -O ~/taxi/data/simple/data.csv

# download 
wget https://raw.githubusercontent.com/tensorflow/tfx/master/tfx/examples/chicago_taxi_pipeline/taxi_utils.py -O ~/taxi/taxi_utils.py

--2019-06-25 11:22:15--  https://raw.githubusercontent.com/tensorflow/tfx/master/tfx/examples/chicago_taxi_pipeline/data/simple/data.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.108.133
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1922668 (1.8M) [text/plain]
Saving to: ‘/root/taxi/data/simple/data.csv’

     0K .......... .......... .......... .......... ..........  2% 3.76M 0s
    50K .......... .......... .......... .......... ..........  5% 4.83M 0s
   100K .......... .......... .......... .......... ..........  7% 6.48M 0s
   150K .......... .......... .......... .......... .......... 10% 5.43M 0s
   200K .......... .......... .......... .......... .......... 13% 5.97M 0s
   250K .......... .......... .......... .......... .......... 15% 5.88M 0s
   300K .......... .......... .......... .......... .......... 18% 10.1M 0s
   350K ........

## Import

In [34]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import datetime
import logging
import os
from google.protobuf import json_format

from tfx.components.base.base_component import ComponentOutputs
from tfx.components.evaluator.component import Evaluator
from tfx.components.example_gen.csv_example_gen.component import CsvExampleGen
from tfx.components.example_validator.component import ExampleValidator
from tfx.components.model_validator.component import ModelValidator
from tfx.components.pusher.component import Pusher
from tfx.components.schema_gen.component import SchemaGen
from tfx.components.statistics_gen.component import StatisticsGen
from tfx.components.trainer.component import Trainer
from tfx.components.transform.component import Transform
from tfx.orchestration.airflow.airflow_runner import AirflowDAGRunner
from tfx.orchestration.pipeline import Pipeline
from tfx.orchestration.tfx_runner import TfxRunner
from tfx.proto import evaluator_pb2
from tfx.proto import example_gen_pb2
from tfx.proto import pusher_pb2
from tfx.proto import trainer_pb2
from tfx.utils.dsl_utils import csv_input
from tfx.utils.channel import Channel
from tfx.utils import types

## configs

In [35]:
# This example assumes that the taxi data is stored in ~/taxi/data and the
# taxi utility function is in ~/taxi.  Feel free to customize this as needed.
_taxi_root = os.path.join(os.environ['HOME'], 'taxi')
_data_root = os.path.join(_taxi_root, 'data/simple')
# Python module file to inject customized logic into the TFX components. The
# Transform and Trainer both require user-defined functions to run successfully.
_taxi_module_file = os.path.join(_taxi_root, 'taxi_utils.py')

# Path which can be listened to by the model server.  Pusher will output the
# trained model here.
_serving_model_dir = os.path.join(_taxi_root, 'serving_model/taxi_simple')

# Directory and data locations.  This example assumes all of the chicago taxi
# example code and metadata library is relative to $HOME, but you can store
# these files anywhere on your local filesystem.
_tfx_root = os.path.join(os.environ['HOME'], 'tfx')
_pipeline_root = os.path.join(_tfx_root, 'pipelines')
_metadata_db_root = os.path.join(_tfx_root, 'metadata')
_log_root = os.path.join(_tfx_root, 'logs')

# Airflow-specific configs; these will be passed directly to airflow
_airflow_config = {
    'schedule_interval': None,
    'start_date': datetime.datetime(2019, 1, 1),
}

# Logging overrides
logger_overrides = {'log_root': _log_root, 'log_level': logging.INFO}

## Create ExampleGen

In [36]:
"""Implements the chicago taxi pipeline with TFX."""
examples = csv_input(_data_root)

# Brings data into the pipeline or otherwise joins/converts training data.
train_config = example_gen_pb2.SplitConfig.Split(name='train', hash_buckets=2)
eval_config = example_gen_pb2.SplitConfig.Split(name='eval', hash_buckets=1)
output_config = example_gen_pb2.Output(
    split_config=example_gen_pb2.SplitConfig(splits=[
        train_config,
        eval_config
    ]))

# Create outputs
train_examples = types.TfxType(type_name='ExamplesPath', split='train')
train_examples.uri = os.path.join(_data_root, 'csv_example_gen/train/')

eval_examples = types.TfxType(type_name='ExamplesPath', split='eval')
eval_examples.uri = os.path.join(_data_root, 'csv_example_gen/eval/')

example_outputs = ComponentOutputs({
    'examples': Channel(
        type_name='ExamplesPath',
        static_artifact_collection=[train_examples, eval_examples]
    ),
    'training_examples': Channel(
        type_name='ExamplesPath',
        static_artifact_collection=[train_examples]
    ),
    'eval_examples': Channel(
        type_name='ExamplesPath',
        static_artifact_collection=[eval_examples]
    ),    
})

example_gen = CsvExampleGen(
    name="CSV ExampleGen Component",
    input_base=examples, # A Channel of 'ExternalPath' type, it contains path of data source.
    output_config=output_config,  # An example_gen_pb2.Output instance, it contains train-eval split ratio.
    outputs=example_outputs # dict from name to output channel, it will be stored example_gen.outputs
)

## Create StatisticsGen

In [37]:
# Create outputs
train_statistics = types.TfxType(type_name='ExampleStatisticsPath', split='train')
train_statistics.uri = os.path.join(_data_root, 'statistics_gen/train/')

eval_statistics = types.TfxType(type_name='ExampleStatisticsPath', split='eval')
eval_statistics.uri = os.path.join(_data_root, 'statistics_gen/eval/')

statistics_outputs = ComponentOutputs({
    'output': Channel(
        type_name='ExampleStatisticsPath',
        static_artifact_collection=[train_statistics, eval_statistics]
    )
})

statistics_gen = StatisticsGen(
    name='StatisticsGen Component', # Optional, name should be unique if you are going to use multiple StatisticsGen in same pipeline.
    input_data=example_gen.outputs.examples, # A Channel of 'ExamplesPath' type, it is equal to example_outputs
    outputs=statistics_outputs # dict from name to output channel, it will be stored statistics_gen.outputs
)

## Create SchemaGen

In [38]:
# Create outputs
train_schema_path = types.TfxType(type_name='SchemaPath', split='train')
train_schema_path.uri = os.path.join(_data_root, 'schema_gen/')

# NOTE: SchemaGen.executor can handle JUST ONE SchemaPath.
# Two or more SchemaPaths will cause ValueError
# such as "ValueError: expected list length of one but got 2".
schema_outputs = ComponentOutputs({
    'output':Channel(
        type_name='SchemaPath',
        static_artifact_collection=[train_schema_path] 
    )
})

infer_schema = SchemaGen(
    name='SchemaGen Component',  # Optional, name should be unique if you are going to use multiple StatisticsGen in same pipeline.
    stats=statistics_gen.outputs.output, # A Channel of 'ExampleStatisticsPath' type, it is equal to statistics_outputs
    outputs=schema_outputs # dict from name to output channel, it will be stored schema_gen.outputs
)

## Create Transform

In [39]:
train_examples = types.TfxType(type_name='ExamplesPath', split='train')
train_examples.uri = os.path.join(_data_root,
                                  'transform/transformed_examples/train/')
eval_examples = types.TfxType(type_name='ExamplesPath', split='eval')
eval_examples.uri = os.path.join(_data_root,
                                 'transform/transformed_examples/eval/')
transform_output = types.TfxType(type_name='TransformPath')
transform_output.uri = os.path.join(_data_root,
                                    'transform/transform_output/')

transform_outputs = ComponentOutputs({
    # Output of 'tf.Transform', which includes an exported 
    # Tensorflow graph suitable for both training and serving
    'transform_output':Channel(
        type_name='TransformPath',
        static_artifact_collection=[transform_output]
    ),
    # transformed_examples: Materialized transformed examples, which includes 
    # both 'train' and 'eval' splits.
    'transformed_examples':Channel(
        type_name='ExamplesPath',
        static_artifact_collection=[train_examples, eval_examples]
    )
})

transform = Transform(
    name="Transform Component",
    input_data=example_gen.outputs.examples,
    schema=infer_schema.outputs.output,
    module_file=_taxi_module_file,
    outputs=transform_outputs
)

## Create Trainer

In [40]:
model_exports = types.TfxType(type_name='ModelExportPath')
model_exports.uri = os.path.join(_data_root, 'trainer/current/')

trainer_outputs = ComponentOutputs({
    'output':Channel(
        type_name='ModelExportPath',
        static_artifact_collection=[model_exports]
    )
})

trainer = Trainer(
    name='Trainer Component',
    module_file=_taxi_module_file,
    transformed_examples=transform.outputs.transformed_examples,
    schema=infer_schema.outputs.output,
    transform_output=transform.outputs.transform_output,
    train_args=trainer_pb2.TrainArgs(num_steps=10000),
    eval_args=trainer_pb2.EvalArgs(num_steps=5000),
    outputs=trainer_outputs
)


## Create Model Analyzer

In [41]:
eval_output = types.TfxType('ModelEvalPath')
eval_output.uri = os.path.join(_data_root, 'eval_output/')

model_analyzer_outputs = ComponentOutputs({
    'output':
    Channel(
        type_name='ModelEvalPath',
        static_artifact_collection=[eval_output]),
})

feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec(specs=[
    evaluator_pb2.SingleSlicingSpec(
        column_for_slicing=['trip_start_hour'])
])

model_analyzer = Evaluator(
    name='Evaluator Component',
    examples=example_gen.outputs.examples,
    model_exports=trainer.outputs.output,
    feature_slicing_spec=feature_slicing_spec,
    outputs=model_analyzer_outputs
)

In [42]:
def _Do(self, input_dict, output_dict, exec_properties):
    import apache_beam as beam
    import tensorflow as tf
    import tensorflow_model_analysis as tfma
    from typing import Any, Dict, List, Text
    from tfx.components.base import base_executor
    from tfx.proto import evaluator_pb2
    from tfx.utils import io_utils
    from tfx.utils import path_utils
    from tfx.utils import types
    from google.protobuf import json_format

    """Runs a batch job to evaluate the eval_model against the given input.
    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - model_exports: exported model.
        - examples: examples for eval the model.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: model evaluation results.
      exec_properties: A dict of execution properties.
        - feature_slicing_spec: JSON string of evaluator_pb2.FeatureSlicingSpec
          instance, providing the way to slice the data.
    Returns:
      None
    """
    if 'model_exports' not in input_dict:
      raise ValueError('\'model_exports\' is missing in input dict.')
    if 'examples' not in input_dict:
      raise ValueError('\'examples\' is missing in input dict.')
    if 'output' not in output_dict:
      raise ValueError('\'output\' is missing in output dict.')

    self._log_startup(input_dict, output_dict, exec_properties)

    # Extract input artifacts
    model_exports_uri = types.get_single_uri(input_dict['model_exports'])

    feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec()
    json_format.Parse(exec_properties['feature_slicing_spec'],
                      feature_slicing_spec)
    slice_spec = self._get_slice_spec_from_feature_slicing_spec(
        feature_slicing_spec)

    output_uri = types.get_single_uri(output_dict['output'])

    eval_model_path = path_utils.eval_model_path(model_exports_uri)

    tf.logging.info('Using {} for model eval.'.format(eval_model_path))
    eval_shared_model = tfma.default_eval_shared_model(
        add_metrics_callbacks=[
                        # calibration_plot_and_prediction_histogram computes calibration plot and prediction
                        # distribution at different thresholds.
                        tfma.post_export_metrics.calibration_plot_and_prediction_histogram(),
                        # auc_plots enables precision-recall curve and ROC visualization at different thresholds.
                        tfma.post_export_metrics.auc_plots()
                    ],
        eval_saved_model_path=eval_model_path)

    tf.logging.info('Evaluating model.')
    with beam.Pipeline(argv=self._get_beam_pipeline_args()) as pipeline:
      # pylint: disable=expression-not-assigned
      (pipeline
       | 'ReadData' >> beam.io.ReadFromTFRecord(
           file_pattern=io_utils.all_files_pattern(
               types.get_split_uri(input_dict['examples'], 'eval')))
       |
       'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults(
           eval_shared_model=eval_shared_model,
           slice_spec=slice_spec,
           output_path=output_uri))
    tf.logging.info(
        'Evaluation complete. Results written to {}.'.format(output_uri))

model_analyzer.executor.Do = _Do

## Create Model Validator

In [43]:
blessing = types.TfxType(type_name='ModelBlessingPath')
blessing.uri = os.path.join(_data_root, 'model_validator/blessed/')

results = types.TfxType(type_name='ModelValidationPath')
results.uri = os.path.join(_data_root, 'model_validator/results/')

model_validator_outputs = ComponentOutputs({
    'blessing':
    Channel(
        type_name='ModelBlessingPath',
        static_artifact_collection=[blessing]),
    'results':
    Channel(
        type_name='ModelValidationPath',
        static_artifact_collection=[results]),
})

model_validator = ModelValidator(
    name='Model Validator Component',
    examples=example_gen.outputs.examples, 
    model=trainer.outputs.output,
    outputs=model_validator_outputs
)

## Create Pusher

In [44]:
custom_config={
    # If custom_config contains 'cmle_serving_args', Pusher will try to push ml model to AI Platform (GCE).
    # However, this config will be deplecated in next release of TFX.
    # To run it localy, we use empty dictionary .
}

pusher = Pusher(
    name='Pusher Component',
    model_export=trainer.outputs.output,
    model_blessing=model_validator.outputs.blessing,
    push_destination=pusher_pb2.PushDestination(
        filesystem=pusher_pb2.PushDestination.Filesystem(
            base_directory=_serving_model_dir)),
    outputs='',
    custom_config=custom_config,
)

## Create Pipeline

In [45]:
pipeline = Pipeline(
    pipeline_name="TFX Pipeline",
    pipeline_root=_pipeline_root,
    components=[
        example_gen, 
        statistics_gen, 
        infer_schema, 
        transform, 
        trainer, 
        model_analyzer, 
        model_validator,
        pusher,
    ]
)

## Execute

In [46]:
class DirectRunner(TfxRunner):
    """Tfx runner on local"""
    
    def __init__(self, config=None):
        self._config = config or {}
    
    def run(self, pipeline):
        for component in pipeline.components:
            self._execute_component(component)
            
        return pipeline
            
    def _execute_component(self, component):
        input_dict = {key:value.get() for key, value in component.input_dict.items()}
        output_dict = {key: value.get() for key, value in component.outputs.get_all().items()}
        exec_properties = component.exec_properties
        executor = component.executor()
        executor.Do(input_dict, output_dict, exec_properties)

In [47]:
pipeline = DirectRunner().run(pipeline)

INFO:tensorflow:Starting Executor execution.
[2019-06-25 11:22:19,975] {base_executor.py:72} INFO - Starting Executor execution.
INFO:tensorflow:Inputs for Executor is: {"input-base": [{"artifact": {"properties": {"type_name": {"stringValue": "ExternalPath"}, "split": {"stringValue": ""}}, "uri": "/root/taxi/data/simple"}, "artifact_type": {"properties": {"name": "STRING", "type_name": "STRING", "split": "STRING", "span": "INT", "state": "STRING"}, "name": "ExternalPath"}}]}
[2019-06-25 11:22:19,983] {base_executor.py:74} INFO - Inputs for Executor is: {"input-base": [{"artifact": {"properties": {"type_name": {"stringValue": "ExternalPath"}, "split": {"stringValue": ""}}, "uri": "/root/taxi/data/simple"}, "artifact_type": {"properties": {"name": "STRING", "type_name": "STRING", "split": "STRING", "span": "INT", "state": "STRING"}, "name": "ExternalPath"}}]}
INFO:tensorflow:Outputs for Executor is: {"examples": [{"artifact": {"properties": {"type_name": {"stringValue": "ExamplesPath"}, 

[2019-06-25 11:22:27,625] {fn_api_runner.py:437} INFO - Running (((ref_AppliedPTransform_OutputSpliteval/Write/WriteImpl/DoOnce/Read_66)+(ref_AppliedPTransform_OutputSpliteval/Write/WriteImpl/InitializeWrite_67))+(ref_PCollection_PCollection_42/Write))+(ref_PCollection_PCollection_43/Write)
[2019-06-25 11:22:27,643] {fn_api_runner.py:437} INFO - Running (((ref_AppliedPTransform_OutputSplittrain/Write/WriteImpl/DoOnce/Read_40)+(ref_AppliedPTransform_OutputSplittrain/Write/WriteImpl/InitializeWrite_41))+(ref_PCollection_PCollection_24/Write))+(ref_PCollection_PCollection_25/Write)
[2019-06-25 11:22:27,660] {fn_api_runner.py:437} INFO - Running ((ShuffleSpliteval/ReshufflePerKey/GroupByKey/Read)+(ref_AppliedPTransform_ShuffleSpliteval/ReshufflePerKey/FlatMap(restore_timestamps)_60))+(((ref_AppliedPTransform_ShuffleSpliteval/RemoveRandomKeys_61)+(ref_AppliedPTransform_OutputSpliteval/Write/WriteImpl/WriteBundles_68))+(((ref_AppliedPTransform_OutputSpliteval/Write/WriteImpl/Pair_69)+(ref_Ap

[2019-06-25 11:22:33,265] {fn_api_runner.py:437} INFO - Running (((((((ref_AppliedPTransform_ReadData.train/Read_106)+((((ref_AppliedPTransform_DecodeData.train/ParseTFExamples_108)+(ref_AppliedPTransform_GenerateStatistics.train/RunStatsGenerators/KeyWithVoid_111))+(ref_AppliedPTransform_GenerateStatistics.train/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/ParDo(SplitHotCold)/ParDo(SplitHotCold)_115))+(ref_AppliedPTransform_GenerateStatistics.train/RunStatsGenerators/GenerateSlicedStatisticsImpl/TopKUniquesStatsGenerator/TopKUniques_ConvertInputToFeatureValuesWithWeights_135)))+(GenerateStatistics.train/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/Flatten/Transcode/0))+(ref_AppliedPTransform_GenerateStatistics.train/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/WindowIntoDiscarding_116))+(GenerateStatistics.train/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/Flatten/Write/0))+((GenerateStatistics

[2019-06-25 11:22:39,274] {fn_api_runner.py:437} INFO - Running ((GenerateStatistics.train/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/CombinePerKey(PostCombineFn)/Group/Read)+(GenerateStatistics.train/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/CombinePerKey(PostCombineFn)/Merge))+((GenerateStatistics.train/RunStatsGenerators/GenerateSlicedStatisticsImpl/BasicStatsGenerator/CombinePerKey(PostCombineFn)/ExtractOutputs)+((GenerateStatistics.train/RunStatsGenerators/GenerateSlicedStatisticsImpl/FlattenFeatureStatistics/Transcode/0)+(GenerateStatistics.train/RunStatsGenerators/GenerateSlicedStatisticsImpl/FlattenFeatureStatistics/Write/0)))
[2019-06-25 11:22:39,560] {fn_api_runner.py:437} INFO - Running (GenerateStatistics.train/RunStatsGenerators/GenerateSlicedStatisticsImpl/FlattenFeatureStatistics/Read)+((GenerateStatistics.train/RunStatsGenerators/GenerateSlicedStatisticsImpl/MergeDatasetFeatureStatisticsProtos/Precombine)+(GenerateStati

[2019-06-25 11:22:43,520] {fn_api_runner.py:437} INFO - Running (GenerateStatistics.eval/RunStatsGenerators/GenerateSlicedStatisticsImpl/TopKUniquesStatsGenerator/Uniques_CountPerFeatureName/CombinePerKey(CountCombineFn)/Group/Read)+((GenerateStatistics.eval/RunStatsGenerators/GenerateSlicedStatisticsImpl/TopKUniquesStatsGenerator/Uniques_CountPerFeatureName/CombinePerKey(CountCombineFn)/Merge)+((GenerateStatistics.eval/RunStatsGenerators/GenerateSlicedStatisticsImpl/TopKUniquesStatsGenerator/Uniques_CountPerFeatureName/CombinePerKey(CountCombineFn)/ExtractOutputs)+((ref_AppliedPTransform_GenerateStatistics.eval/RunStatsGenerators/GenerateSlicedStatisticsImpl/TopKUniquesStatsGenerator/Uniques_ConvertToSingleFeatureStats_63)+(GenerateStatistics.eval/RunStatsGenerators/GenerateSlicedStatisticsImpl/TopKUniquesStatsGenerator/FlattenTopKUniquesResults/Write/1))))
[2019-06-25 11:22:43,539] {fn_api_runner.py:437} INFO - Running (GenerateStatistics.eval/RunStatsGenerators/GenerateSlicedStatist

[2019-06-25 11:22:44,219] {base_executor.py:74} INFO - Inputs for Executor is: {"schema": [{"artifact": {"properties": {"type_name": {"stringValue": "SchemaPath"}, "split": {"stringValue": "train"}}, "uri": "/root/taxi/data/simple/schema_gen/"}, "artifact_type": {"properties": {"name": "STRING", "type_name": "STRING", "split": "STRING", "span": "INT", "state": "STRING"}, "name": "SchemaPath"}}], "input_data": [{"artifact": {"properties": {"type_name": {"stringValue": "ExamplesPath"}, "split": {"stringValue": "train"}}, "uri": "/root/taxi/data/simple/csv_example_gen/train/"}, "artifact_type": {"properties": {"name": "STRING", "type_name": "STRING", "split": "STRING", "span": "INT", "state": "STRING"}, "name": "ExamplesPath"}}, {"artifact": {"properties": {"type_name": {"stringValue": "ExamplesPath"}, "split": {"stringValue": "eval"}}, "uri": "/root/taxi/data/simple/csv_example_gen/eval/"}, "artifact_type": {"properties": {"name": "STRING", "type_name": "STRING", "split": "STRING", "span

[2019-06-25 11:22:51,586] {fn_api_runner.py:437} INFO - Running ((ref_AppliedPTransform_ReadAnalysisDataset[0]/Read/Read_4)+(ref_AppliedPTransform_ReadAnalysisDataset[0]/AddKey_5))+((ref_AppliedPTransform_ReadAnalysisDataset[0]/ParseExamples_6)+((ref_AppliedPTransform_DecodeAnalysisDataset[0]/ApplyDecodeFn_8)+(FlattenAnalysisDatasets/Write/0)))
[2019-06-25 11:22:53,161] {fn_api_runner.py:437} INFO - Running (ref_AppliedPTransform_AnalyzeDataset/CreateSavedModelForAnalyzerInputs[0]/CreateSavedModel/Read_13)+(ref_PCollection_PCollection_6/Write)
[2019-06-25 11:22:53,189] {fn_api_runner.py:437} INFO - Running (((((((((((((((((FlattenAnalysisDatasets/Read)+(ref_AppliedPTransform_AnalyzeDataset/ApplySavedModel[0]/BatchInputs/BatchElements/ParDo(_GlobalWindowsBatchingDoFn)_17))+((((((((((((((ref_AppliedPTransform_AnalyzeDataset/ApplySavedModel[0]/ApplySavedModel_18)+(ref_AppliedPTransform_AnalyzeDataset/TensorSource[scale_to_z_score/mean_and_var]_19))+(ref_AppliedPTransform_AnalyzeDataset/Te

[2019-06-25 11:22:54,906] {fn_api_runner.py:437} INFO - Running (AnalyzeDataset/CacheableCombineMerge[bucketize_1/quantiles]/MergeCombinesGlobally/CombinePerKey/Group/Read)+(((AnalyzeDataset/CacheableCombineMerge[bucketize_1/quantiles]/MergeCombinesGlobally/CombinePerKey/Merge)+(AnalyzeDataset/CacheableCombineMerge[bucketize_1/quantiles]/MergeCombinesGlobally/CombinePerKey/ExtractOutputs))+((ref_AppliedPTransform_AnalyzeDataset/CacheableCombineMerge[bucketize_1/quantiles]/MergeCombinesGlobally/UnKey_273)+(ref_PCollection_PCollection_169/Write)))
[2019-06-25 11:22:55,043] {fn_api_runner.py:437} INFO - Running ((((ref_AppliedPTransform_AnalyzeDataset/CacheableCombineMerge[bucketize_1/quantiles]/MergeCombinesGlobally/DoOnce/Read_275)+(ref_AppliedPTransform_AnalyzeDataset/CacheableCombineMerge[bucketize_1/quantiles]/MergeCombinesGlobally/InjectDefault_276))+(ref_AppliedPTransform_AnalyzeDataset/CacheableCombineMerge[bucketize_1/quantiles]/ExtractOutputs/FlatMap(extract_outputs)_278))+(ref_

[2019-06-25 11:22:55,623] {fn_api_runner.py:437} INFO - Running (((AnalyzeDataset/CacheableCombineMerge[scale_to_z_score/mean_and_var]/MergeCombinesGlobally/CombinePerKey/Group/Read)+(AnalyzeDataset/CacheableCombineMerge[scale_to_z_score/mean_and_var]/MergeCombinesGlobally/CombinePerKey/Merge))+(AnalyzeDataset/CacheableCombineMerge[scale_to_z_score/mean_and_var]/MergeCombinesGlobally/CombinePerKey/ExtractOutputs))+((((ref_AppliedPTransform_AnalyzeDataset/CacheableCombineMerge[scale_to_z_score/mean_and_var]/MergeCombinesGlobally/UnKey_41)+(((ref_AppliedPTransform_AnalyzeDataset/CacheableCombineMerge[scale_to_z_score/mean_and_var]/ExtractOutputs/FlatMap(extract_outputs)_43)+(ref_AppliedPTransform_AnalyzeDataset/CreateTensorBinding[scale_to_z_score/mean_and_var/Placeholder]_44))+((ref_AppliedPTransform_AnalyzeDataset/CreateTensorBinding[scale_to_z_score/mean_and_var/Placeholder_1]_45)+(AnalyzeDataset/CreateSavedModel/Flatten/Transcode/3))))+((AnalyzeDataset/CreateSavedModel/Flatten/Transc

[2019-06-25 11:22:56,642] {fn_api_runner.py:437} INFO - Running (AnalyzeDataset/VocabularyWrite[compute_and_apply_vocabulary/vocabulary]/WriteToFile/Write/WriteImpl/GroupByKey/Read)+((ref_AppliedPTransform_AnalyzeDataset/VocabularyWrite[compute_and_apply_vocabulary/vocabulary]/WriteToFile/Write/WriteImpl/WriteBundles_151)+(ref_PCollection_PCollection_93/Write))
[2019-06-25 11:22:56,667] {fn_api_runner.py:437} INFO - Running ((ref_PCollection_PCollection_86/Read)+(ref_AppliedPTransform_AnalyzeDataset/VocabularyWrite[compute_and_apply_vocabulary/vocabulary]/WriteToFile/Write/WriteImpl/PreFinalize_152))+(ref_PCollection_PCollection_94/Write)
[2019-06-25 11:22:56,697] {fn_api_runner.py:437} INFO - Running (ref_PCollection_PCollection_86/Read)+((ref_AppliedPTransform_AnalyzeDataset/VocabularyWrite[compute_and_apply_vocabulary/vocabulary]/WriteToFile/Write/WriteImpl/FinalizeWrite_153)+(ref_PCollection_PCollection_95/Write))
[2019-06-25 11:22:56,715] {filebasedsink.py:290} INFO - Starting fin

[2019-06-25 11:22:57,329] {fn_api_runner.py:437} INFO - Running ((((((AnalyzeDataset/CacheableCombineMerge[scale_to_z_score_1/mean_and_var]/MergeCombinesGlobally/CombinePerKey/Group/Read)+(AnalyzeDataset/CacheableCombineMerge[scale_to_z_score_1/mean_and_var]/MergeCombinesGlobally/CombinePerKey/Merge))+((AnalyzeDataset/CacheableCombineMerge[scale_to_z_score_1/mean_and_var]/MergeCombinesGlobally/CombinePerKey/ExtractOutputs)+((ref_AppliedPTransform_AnalyzeDataset/CacheableCombineMerge[scale_to_z_score_1/mean_and_var]/MergeCombinesGlobally/UnKey_68)+(ref_AppliedPTransform_AnalyzeDataset/CacheableCombineMerge[scale_to_z_score_1/mean_and_var]/ExtractOutputs/FlatMap(extract_outputs)_70))))+(ref_AppliedPTransform_AnalyzeDataset/CreateTensorBinding[scale_to_z_score_1/mean_and_var/Placeholder]_71))+((AnalyzeDataset/CreateSavedModel/Flatten/Transcode/4)+(AnalyzeDataset/CreateSavedModel/Flatten/Write/4)))+((ref_AppliedPTransform_AnalyzeDataset/CreateTensorBinding[scale_to_z_score_1/mean_and_var/P

value: "\n\013\n\tConst_9:0\022/vocab_compute_and_apply_vocabulary_1_vocabulary"

value: "\n\013\n\tConst_9:0\022/vocab_compute_and_apply_vocabulary_1_vocabulary"

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
[2019-06-25 11:23:02,249] {saver.py:1483} INFO - Saver not created because there are no variables in the graph to restore
[2019-06-25 11:23:04,312] {fn_api_runner.py:437} INFO - Running (Materialize[1]/Write/Write/WriteImpl/GroupByKey/Read)+((ref_AppliedPTransform_Materialize[1]/Write/Write/WriteImpl/Extract_432)+(ref_PCollection_PCollection_270/Write))
[2019-06-25 11:23:04,329] {fn_api_runner.py:437} INFO - Running (ref_PCollection_PCollection_262/Read)+((ref_AppliedPTransform_Materialize[1]/Write/Write/WriteImpl/PreFinalize_433)+(ref_PCollection_PCollection_271/Write))
[2019-06-25 11:23:04,354] {fn_api_runner.py:437} INFO - Running (Materialize[0]/Write/Write/WriteImpl/GroupByKey/Read)+((ref_AppliedPTransform_Materialize[0]/Write/Write

[2019-06-25 11:23:04,909] {estimator.py:201} INFO - Using config: {'_save_summary_steps': 100, '_keep_checkpoint_max': 1, '_protocol': None, '_is_chief': True, '_save_checkpoints_secs': None, '_service': None, '_evaluation_master': '', '_device_fn': None, '_model_dir': '/root/taxi/data/simple/trainer/current/serving_model_dir', '_save_checkpoints_steps': 999, '_task_id': 0, '_master': '', '_tf_random_seed': None, '_experimental_distribute': None, '_num_ps_replicas': 0, '_num_worker_replicas': 1, '_global_id_in_cluster': 0, '_train_distribute': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_every_n_hours': 10000, '_task_type': 'worker', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fe776436a20>, '_eval_distribute': None, '_log_step_count_steps': 100}
INFO:tensorflow:Training model.
[2019-06-25 11:23:04,929] {executor.py:141} INFO - Training model.
INFO

[2019-06-25 11:23:29,299] {evaluation.py:169} INFO - Evaluation [3500/5000]
INFO:tensorflow:Evaluation [4000/5000]
[2019-06-25 11:23:30,736] {evaluation.py:169} INFO - Evaluation [4000/5000]
INFO:tensorflow:Evaluation [4500/5000]
[2019-06-25 11:23:32,133] {evaluation.py:169} INFO - Evaluation [4500/5000]
INFO:tensorflow:Evaluation [5000/5000]
[2019-06-25 11:23:33,413] {evaluation.py:169} INFO - Evaluation [5000/5000]
INFO:tensorflow:Finished evaluation at 2019-06-25-11:23:33
[2019-06-25 11:23:33,506] {evaluation.py:277} INFO - Finished evaluation at 2019-06-25-11:23:33
INFO:tensorflow:Saving dict for global step 999: accuracy = 0.769535, accuracy_baseline = 0.769735, auc = 0.9024479, auc_precision_recall = 0.63227355, average_loss = 0.45378688, global_step = 999, label/mean = 0.230265, loss = 18.151476, precision = 0.0, prediction/mean = 0.22809385, recall = 0.0
[2019-06-25 11:23:33,510] {estimator.py:1979} INFO - Saving dict for global step 999: accuracy = 0.769535, accuracy_baseline 

INFO:tensorflow:Skip the current checkpoint eval due to throttle secs (600 secs).
[2019-06-25 11:23:39,687] {training.py:525} INFO - Skip the current checkpoint eval due to throttle secs (600 secs).
INFO:tensorflow:global_step/sec: 185.237
[2019-06-25 11:23:39,720] {basic_session_run_hooks.py:680} INFO - global_step/sec: 185.237
INFO:tensorflow:loss = 15.338256, step = 3001 (0.542 sec)
[2019-06-25 11:23:39,729] {basic_session_run_hooks.py:247} INFO - loss = 15.338256, step = 3001 (0.542 sec)
INFO:tensorflow:global_step/sec: 377.675
[2019-06-25 11:23:39,985] {basic_session_run_hooks.py:680} INFO - global_step/sec: 377.675
INFO:tensorflow:loss = 13.534388, step = 3101 (0.260 sec)
[2019-06-25 11:23:39,988] {basic_session_run_hooks.py:247} INFO - loss = 13.534388, step = 3101 (0.260 sec)
INFO:tensorflow:global_step/sec: 330.926
[2019-06-25 11:23:40,287] {basic_session_run_hooks.py:680} INFO - global_step/sec: 330.926
INFO:tensorflow:loss = 14.332558, step = 3201 (0.303 sec)
[2019-06-25 11:

[2019-06-25 11:23:47,298] {basic_session_run_hooks.py:247} INFO - loss = 14.322586, step = 5301 (0.288 sec)
INFO:tensorflow:global_step/sec: 333.118
[2019-06-25 11:23:47,588] {basic_session_run_hooks.py:680} INFO - global_step/sec: 333.118
INFO:tensorflow:loss = 14.110179, step = 5401 (0.297 sec)
[2019-06-25 11:23:47,595] {basic_session_run_hooks.py:247} INFO - loss = 14.110179, step = 5401 (0.297 sec)
INFO:tensorflow:global_step/sec: 339.706
[2019-06-25 11:23:47,882] {basic_session_run_hooks.py:680} INFO - global_step/sec: 339.706
INFO:tensorflow:loss = 17.12027, step = 5501 (0.305 sec)
[2019-06-25 11:23:47,900] {basic_session_run_hooks.py:247} INFO - loss = 17.12027, step = 5501 (0.305 sec)
INFO:tensorflow:global_step/sec: 336.226
[2019-06-25 11:23:48,179] {basic_session_run_hooks.py:680} INFO - global_step/sec: 336.226
INFO:tensorflow:loss = 14.580484, step = 5601 (0.283 sec)
[2019-06-25 11:23:48,183] {basic_session_run_hooks.py:247} INFO - loss = 14.580484, step = 5601 (0.283 sec)


INFO:tensorflow:global_step/sec: 358.419
[2019-06-25 11:23:55,129] {basic_session_run_hooks.py:680} INFO - global_step/sec: 358.419
INFO:tensorflow:loss = 13.523454, step = 7801 (0.282 sec)
[2019-06-25 11:23:55,137] {basic_session_run_hooks.py:247} INFO - loss = 13.523454, step = 7801 (0.282 sec)
INFO:tensorflow:global_step/sec: 415.111
[2019-06-25 11:23:55,370] {basic_session_run_hooks.py:680} INFO - global_step/sec: 415.111
INFO:tensorflow:loss = 17.571627, step = 7901 (0.237 sec)
[2019-06-25 11:23:55,375] {basic_session_run_hooks.py:247} INFO - loss = 17.571627, step = 7901 (0.237 sec)
INFO:tensorflow:Saving checkpoints for 7992 into /root/taxi/data/simple/trainer/current/serving_model_dir/model.ckpt.
[2019-06-25 11:23:55,695] {basic_session_run_hooks.py:594} INFO - Saving checkpoints for 7992 into /root/taxi/data/simple/trainer/current/serving_model_dir/model.ckpt.
INFO:tensorflow:Skip the current checkpoint eval due to throttle secs (600 secs).
[2019-06-25 11:23:55,922] {training.

INFO:tensorflow:Skip the current checkpoint eval due to throttle secs (600 secs).
[2019-06-25 11:24:02,405] {training.py:525} INFO - Skip the current checkpoint eval due to throttle secs (600 secs).
INFO:tensorflow:Calling model_fn.
[2019-06-25 11:24:02,492] {estimator.py:1111} INFO - Calling model_fn.
INFO:tensorflow:Done calling model_fn.
[2019-06-25 11:24:04,391] {estimator.py:1113} INFO - Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-06-25T11:24:04Z
[2019-06-25 11:24:04,423] {evaluation.py:257} INFO - Starting evaluation at 2019-06-25T11:24:04Z
INFO:tensorflow:Graph was finalized.
[2019-06-25 11:24:04,672] {monitored_session.py:222} INFO - Graph was finalized.
INFO:tensorflow:Restoring parameters from /root/taxi/data/simple/trainer/current/serving_model_dir/model.ckpt-10000
[2019-06-25 11:24:04,677] {saver.py:1270} INFO - Restoring parameters from /root/taxi/data/simple/trainer/current/serving_model_dir/model.ckpt-10000
INFO:tensorflow:Running local_init_op.
[2

value: "\n\013\n\tConst_9:0\022/vocab_compute_and_apply_vocabulary_1_vocabulary"

value: "\n\013\n\tConst_9:0\022/vocab_compute_and_apply_vocabulary_1_vocabulary"

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
[2019-06-25 11:24:24,195] {saver.py:1483} INFO - Saver not created because there are no variables in the graph to restore
INFO:tensorflow:Calling model_fn.
[2019-06-25 11:24:24,270] {estimator.py:1111} INFO - Calling model_fn.
INFO:tensorflow:Done calling model_fn.
[2019-06-25 11:24:26,830] {estimator.py:1113} INFO - Done calling model_fn.
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
[2019-06-25 11:24:26,838] {export.py:587} INFO - Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Train: None
[2019-06-25 11:24:26,845] {export.py:587} INFO - Signatures INCLUDED in export for Train: None
INFO:tensorflow:Signatures INCLUDED in export for Eval: ['eval']
[2019-06-25 11:24:26

  | 'IncrementCounter' >> beam.Map(increment_counter))


[2019-06-25 11:24:30,022] {fn_api_runner.py:437} INFO - Running ((ref_AppliedPTransform_ExtractEvaluateAndWriteResults/WriteResults/WriteTFRecord(/root/taxi/data/simple/eval_output/metrics)/WriteToTFRecord/Write/WriteImpl/DoOnce/Read_68)+((ref_AppliedPTransform_ExtractEvaluateAndWriteResults/WriteResults/WriteTFRecord(/root/taxi/data/simple/eval_output/metrics)/WriteToTFRecord/Write/WriteImpl/InitializeWrite_69)+(ref_PCollection_PCollection_34/Write)))+(ref_PCollection_PCollection_33/Write)
[2019-06-25 11:24:30,047] {fn_api_runner.py:437} INFO - Running (((((((ref_AppliedPTransform_ReadData/Read_3)+((ref_AppliedPTransform_ExtractEvaluateAndWriteResults/InputsToExtracts/Map(<lambda at model_eval_lib.py:393>)_6)+(ref_AppliedPTransform_ExtractEvaluateAndWriteResults/ExtractAndEvaluate/Predict/Batch/ParDo(_GlobalWindowsBatchingDoFn)_10)))+(((ref_AppliedPTransform_ExtractEvaluateAndWriteResults/ExtractAndEvaluate/Predict/Predict_11)+(ref_AppliedPTransform_ExtractEvaluateAndWriteResults/Extr

[2019-06-25 11:24:59,599] {fn_api_runner.py:437} INFO - Running (ref_PCollection_PCollection_33/Read)+(ref_AppliedPTransform_ExtractEvaluateAndWriteResults/WriteResults/WriteTFRecord(/root/taxi/data/simple/eval_output/metrics)/WriteToTFRecord/Write/WriteImpl/FinalizeWrite_78)
[2019-06-25 11:24:59,616] {filebasedsink.py:290} INFO - Starting finalize_write threads with num_shards: 1 (skipped: 0), batches: 1, num_threads: 1
[2019-06-25 11:24:59,720] {filebasedsink.py:327} INFO - Renamed 1 shards in 0.10 seconds.
[2019-06-25 11:24:59,745] {fn_api_runner.py:437} INFO - Running (ref_PCollection_PCollection_43/Read)+((ref_AppliedPTransform_ExtractEvaluateAndWriteResults/WriteResults/WriteTFRecord(/root/taxi/data/simple/eval_output/plots)/WriteToTFRecord/Write/WriteImpl/PreFinalize_93)+(ref_PCollection_PCollection_51/Write))
[2019-06-25 11:24:59,766] {fn_api_runner.py:437} INFO - Running (ref_PCollection_PCollection_43/Read)+(ref_AppliedPTransform_ExtractEvaluateAndWriteResults/WriteResults/Wr

[2019-06-25 11:25:00,100] {fn_api_runner.py:437} INFO - Running (ref_PCollection_PCollection_54/Read)+(ref_AppliedPTransform_ExtractEvaluateAndWriteResults/WriteEvalConfig(EvalConfig(model_location='/root/taxi/data/simple/trainer/current/eval_model_dir/1561461864', data_location='<user provided PCollection>', slice_spec=[SingleSliceSpec(columns=frozenset({'trip_start_hour'}), features=frozenset()), SingleSliceSpec(columns=frozenset(), features=frozenset())], example_weight_metric_key='post_export_metrics/example_count', num_bootstrap_samples=1))/WriteEvalConfig/Write/WriteImpl/FinalizeWrite_112)
[2019-06-25 11:25:00,114] {filebasedsink.py:290} INFO - Starting finalize_write threads with num_shards: 1 (skipped: 0), batches: 1, num_threads: 1
[2019-06-25 11:25:00,220] {filebasedsink.py:327} INFO - Renamed 1 shards in 0.10 seconds.
[2019-06-25 11:25:00,240] {fn_api_runner.py:437} INFO - Running (ref_AppliedPTransform_ExtractEvaluateAndWriteResults/ExtractAndEvaluate/EvaluateMetricsAndPlot

[2019-06-25 11:25:01,230] {fn_api_runner.py:437} INFO - Running (((((((ref_AppliedPTransform_ReadData/Read_3)+((ref_AppliedPTransform_EvalCurrentModel/InputsToExtracts/Map(<lambda at model_eval_lib.py:393>)_6)+(ref_AppliedPTransform_EvalCurrentModel/ExtractAndEvaluate/Predict/Batch/ParDo(_GlobalWindowsBatchingDoFn)_10)))+(((ref_AppliedPTransform_EvalCurrentModel/ExtractAndEvaluate/Predict/Predict_11)+(ref_AppliedPTransform_EvalCurrentModel/ExtractAndEvaluate/ExtractSliceKeys/ParDo(_ExtractSliceKeysFn)_13))+((ref_AppliedPTransform_EvalCurrentModel/ExtractAndEvaluate/EvaluateMetricsAndPlots/Filter/Map(filter_extracts)_16)+(ref_AppliedPTransform_EvalCurrentModel/ExtractAndEvaluate/EvaluateMetricsAndPlots/ComputeMetricsAndPlots/FanoutSlices/DoSlicing_19))))+((ref_AppliedPTransform_EvalCurrentModel/ExtractAndEvaluate/EvaluateMetricsAndPlots/ComputeMetricsAndPlots/FanoutSlices/TrackDistinctSliceKeys/ExtractSliceKeys_21)+(ref_AppliedPTransform_EvalCurrentModel/ExtractAndEvaluate/EvaluateMetri

[2019-06-25 11:25:09,370] {fn_api_runner.py:437} INFO - Running (ref_PCollection_PCollection_54/Read)+(ref_AppliedPTransform_EvalCurrentModel/WriteEvalConfig(EvalConfig(model_location='/root/taxi/data/simple/trainer/current/eval_model_dir/1561461864', data_location='<user provided PCollection>', slice_spec=[SingleSliceSpec(columns=frozenset(), features=frozenset())], example_weight_metric_key='post_export_metrics/example_count', num_bootstrap_samples=1))/WriteEvalConfig/Write/WriteImpl/FinalizeWrite_112)
[2019-06-25 11:25:09,386] {filebasedsink.py:290} INFO - Starting finalize_write threads with num_shards: 1 (skipped: 0), batches: 1, num_threads: 1
[2019-06-25 11:25:09,489] {filebasedsink.py:327} INFO - Renamed 1 shards in 0.10 seconds.
[2019-06-25 11:25:09,498] {fn_api_runner.py:437} INFO - Running ((EvalCurrentModel/ExtractAndEvaluate/EvaluateMetricsAndPlots/ComputeMetricsAndPlots/FanoutSlices/TrackDistinctSliceKeys/RemoveDuplicates/RemoveDuplicates/Group/Group/Read)+((EvalCurrentMo

[2019-06-25 11:25:10,114] {base_executor.py:72} INFO - Starting Executor execution.
INFO:tensorflow:Inputs for Executor is: {"model_export": [{"artifact": {"properties": {"type_name": {"stringValue": "ModelExportPath"}, "split": {"stringValue": ""}}, "uri": "/root/taxi/data/simple/trainer/current/"}, "artifact_type": {"properties": {"name": "STRING", "type_name": "STRING", "split": "STRING", "span": "INT", "state": "STRING"}, "name": "ModelExportPath"}}], "model_blessing": [{"artifact": {"properties": {"type_name": {"stringValue": "ModelBlessingPath"}, "split": {"stringValue": ""}}, "uri": "/root/taxi/data/simple/model_validator/blessed/", "customProperties": {"current_model_id": {"intValue": "0"}, "blessed": {"intValue": "1"}, "current_model": {"stringValue": "/root/taxi/data/simple/trainer/current/"}}}, "artifact_type": {"properties": {"name": "STRING", "type_name": "STRING", "split": "STRING", "span": "INT", "state": "STRING"}, "name": "ModelBlessingPath"}}]}
[2019-06-25 11:25:10,11

## Check Result

In [19]:
!ls -Rlhs /root/taxi/data/simple/

/root/taxi/data/simple/:
total 1.9M
4.0K drwxr-xr-x 4 root root 4.0K Jun 25 09:05 csv_example_gen
1.9M -rw-r--r-- 1 root root 1.9M Jun 25 09:05 data.csv
4.0K drwxr-xr-x 2 root root 4.0K Jun 25 09:08 eval_output
4.0K drwxr-xr-x 2 root root 4.0K Jun 25 09:05 schema_gen
4.0K drwxr-xr-x 4 root root 4.0K Jun 25 09:05 statistics_gen
4.0K drwxr-xr-x 3 root root 4.0K Jun 25 09:05 trainer
4.0K drwxr-xr-x 4 root root 4.0K Jun 25 09:05 transform

/root/taxi/data/simple/csv_example_gen:
total 8.0K
4.0K drwxr-xr-x 2 root root 4.0K Jun 25 09:05 eval
4.0K drwxr-xr-x 2 root root 4.0K Jun 25 09:05 train

/root/taxi/data/simple/csv_example_gen/eval:
total 204K
204K -rw-r--r-- 1 root root 201K Jun 25 09:05 data_tfrecord-00000-of-00001.gz

/root/taxi/data/simple/csv_example_gen/train:
total 408K
408K -rw-r--r-- 1 root root 405K Jun 25 09:05 data_tfrecord-00000-of-00001.gz

/root/taxi/data/simple/eval_output:
total 34M
4.0K -rw-r--r-- 1 root root  506 Jun 25 09:08 eval_config
 12K