In [1]:
try:
  import colab
  !pip install --upgrade pip
except:
  pass

In [2]:
!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

**Note: In Google Colab, because of package updates, the first time you run this cell you must restart the runtime (Runtime > Restart runtime ...).**

In [None]:
!pip install -q -U -v --log /tmp/pip.log --use-feature=2020-resolver tfx==0.25.0

#!pip install -q papermill
#!pip install -q matplotlib
#!pip install -q networkx

In [3]:
import sys
import os
import pprint
import tempfile
import urllib
from typing import List, Text

import absl
import numpy as np
import tensorflow as tf
import tensorflow_transform as tft
import tensorflow_model_analysis as tfma
import tensorflow_data_validation as tfdv
tf.get_logger().propagate = False
pp = pprint.PrettyPrinter()

import tfx
from tfx.components import CsvExampleGen
from tfx.components import Evaluator
from tfx.components import ExampleValidator
from tfx.components import Pusher
from tfx.components import ResolverNode
from tfx.components import SchemaGen
from tfx.components import StatisticsGen
from tfx.components import Trainer
from tfx.components import Transform
from tfx.components import Tuner
from tfx.components.trainer.executor import GenericExecutor
from tfx.dsl.components.base import executor_spec
from tfx.dsl.experimental import latest_blessed_model_resolver
from tfx.orchestration import metadata
from tfx.orchestration import pipeline
from tfx.orchestration.local.local_dag_runner import LocalDagRunner
#from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext

from tfx.proto import example_gen_pb2
from tfx.proto import pusher_pb2
from tfx.proto import trainer_pb2
from tfx.proto.evaluator_pb2 import SingleSlicingSpec
from tfx.utils.dsl_utils import external_input
from tfx.types import Channel
from tfx.types.standard_artifacts import Model
from tfx.types.standard_artifacts import ModelBlessing

import ml_metadata as mlmd
from ml_metadata.proto import metadata_store_pb2

%load_ext tfx.orchestration.experimental.interactive.notebook_extensions.skip




### Pipeline paths setup

In [4]:
_pipeline_name = 'titanic_local'

# This example assumes that penguin data is stored in ~/penguin/data and the
# utility function is in ~/penguin. Feel free to customize as needed.
_titanic_root = os.path.join(os.environ['HOME'], 'titanic')
_data_root = os.path.join(_titanic_root, 'data')
# Python module file to inject customized logic into the TFX components. The
# Transform, Trainer and Tuner all require user-defined functions to run
# successfully.
_module_file = os.path.join(_titanic_root, 'titanic_utils.py')
# Path which can be listened to by the model server.  Pusher will output the
# trained model here.
_serving_model_dir = os.path.join(_titanic_root, 'serving_model',
                                  _pipeline_name)

# Directory and data locations.  This example assumes all of the
# example code and metadata library is relative to $HOME, but you can store
# these files anywhere on your local filesystem.
_tfx_root = os.path.join(os.environ['HOME'], 'tfx')
_pipeline_root = os.path.join(_tfx_root, 'pipelines', _pipeline_name)
# Sqlite ML-metadata db path.
_metadata_path = os.path.join(_tfx_root, 'metadata', _pipeline_name,
                              'metadata.db')

# Pipeline arguments for Beam powered Components.
_beam_pipeline_args = [
    '--direct_running_mode=multi_processing',
    # 0 means auto-detect based on on the number of CPUs available
    # during execution time.
    '--direct_num_workers=0',
]

# Set up logging.
absl.logging.set_verbosity(absl.logging.INFO)


## Load the Files
We will download Titanic dataset.


In [5]:
import pathlib

#_data_root = tempfile.mkdtemp(prefix='tfx-data')
_train_dirpath = os.path.join(_data_root, "train")
_train_filepath = os.path.join(_train_dirpath, "train.csv")
_test_dirpath = os.path.join(_data_root, "test")
_test_filepath = os.path.join(_test_dirpath, "test.csv")
os.makedirs(_train_dirpath, exist_ok=True)
os.makedirs(_test_dirpath,  exist_ok=True)

!kaggle competitions download -c titanic -p {_data_root} --force
!unzip -o {_data_root}/"titanic.zip" -d {_data_root}
!cp {_data_root}/"train.csv" {_train_filepath}
!cp {_data_root}/"test.csv" {_test_filepath}

Downloading titanic.zip to /home/jupyter/titanic/data
  0%|                                               | 0.00/34.1k [00:00<?, ?B/s]
100%|██████████████████████████████████████| 34.1k/34.1k [00:00<00:00, 24.2MB/s]
Archive:  /home/jupyter/titanic/data/titanic.zip
  inflating: /home/jupyter/titanic/data/gender_submission.csv  
  inflating: /home/jupyter/titanic/data/test.csv  
  inflating: /home/jupyter/titanic/data/train.csv  


In [6]:
!ls {_train_filepath}

/home/jupyter/titanic/data/train/train.csv


### Create pipeline

In [23]:
_VOCAB_FEATURE_KEYS = [
   'Embarked', 'Pclass', 'Sex'
]

# Keys
_LABEL_KEY = 'Survived'
_FARE_KEY = 'Fare'
_CABIN_KEY = 'Cabin'
_PARCH_KEY = 'Parch'
_SIBSP_KEY = 'SibSp'

_ALL_DATA_SIZE = 891
train_ratio_percent=0.8
_TRAIN_DATA_SIZE = int(_ALL_DATA_SIZE * train_ratio_percent)
_EVAL_DATA_SIZE = _ALL_DATA_SIZE - _TRAIN_DATA_SIZE
_TRAIN_BATCH_SIZE = 64
_EVAL_BATCH_SIZE = 64

def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text,
                     module_file: Text, serving_model_dir: Text,
                     metadata_path: Text, enable_tuning: bool,
                     beam_pipeline_args: List[Text],
                     train_ratio_percent: float=0.8) -> pipeline.Pipeline:
  """Implements the titanic pipeline with TFX."""
  
  train_ratio = int(train_ratio_percent*100)
  eval_ratio  = 100-train_ratio

  output = example_gen_pb2.Output(
             split_config=example_gen_pb2.SplitConfig(splits=[
                 example_gen_pb2.SplitConfig.Split(name='train', hash_buckets=train_ratio),
                 example_gen_pb2.SplitConfig.Split(name='eval', hash_buckets=eval_ratio)
             ]))
  examples = external_input(data_root)
  # Brings data into the pipeline or otherwise joins/converts training data.
  example_gen = CsvExampleGen(input=examples, output_config=output)

  # Computes statistics over data for visualization and example validation.
  statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])

  # Generates schema based on statistics files.
  schema_gen = SchemaGen(
      statistics=statistics_gen.outputs['statistics'], infer_feature_shape=False)
  
  # Performs anomaly detection based on statistics and data schema.
  example_validator = ExampleValidator(
      statistics=statistics_gen.outputs['statistics'],
      schema=schema_gen.outputs['schema'])

  # Performs transformations and feature engineering in training and serving.
  transform = Transform(
      examples=example_gen.outputs['examples'],
      schema=schema_gen.outputs['schema'],
      module_file=module_file)
  
  # Tunes the hyperparameters for model training based on user-provided Python
  # function. Note that once the hyperparameters are tuned, you can drop the
  # Tuner component from pipeline and feed Trainer with tuned hyperparameters.
  #if enable_tuning:
    #tuner = Tuner(
    #    module_file=module_file,
    #    examples=transform.outputs['transformed_examples'],
    #    transform_graph=transform.outputs['transform_graph'],
    #   schema=schema_gen.outputs['schema'],
    #    train_args=trainer_pb2.TrainArgs(num_steps=10000),
    #    eval_args=trainer_pb2.EvalArgs(num_steps=2000))



  if enable_tuning:
    # Hyperparameter tuning based on the tuner_fn in module_file.
    tuner = Tuner(
        module_file=module_file,
        examples=transform.outputs['transformed_examples'],
        transform_graph=transform.outputs['transform_graph'],
        schema=schema_gen.outputs['schema'],
        train_args=trainer_pb2.TrainArgs(num_steps=500),
        eval_args=trainer_pb2.EvalArgs(num_steps=100))
        
  
  # Uses user-provided Python function that trains a model.
  trainer = Trainer(
      module_file=module_file,
      custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor),
      examples=transform.outputs['transformed_examples'],
      schema=schema_gen.outputs['schema'],
      transform_graph=transform.outputs['transform_graph'],
      # If Tuner is in the pipeline, Trainer can take Tuner's output
      # best_hyperparameters artifact as input and utilize it in the user module
      # code.
      #
      # If there isn't Tuner in the pipeline, either use ImporterNode to import
      # a previous Tuner's output to feed to Trainer, or directly use the tuned
      # hyperparameters in user module code and set hyperparameters to None
      # here.
      #
      # Example of ImporterNode,
      #   hparams_importer = ImporterNode(
      #     instance_name='import_hparams',
      #     source_uri='path/to/best_hyperparameters.txt',
      #     artifact_type=HyperParameters)
      #   ...
      #   hyperparameters = hparams_importer.outputs['result'],
      hyperparameters=(tuner.outputs['best_hyperparameters']
                       if enable_tuning else None),
      train_args=trainer_pb2.TrainArgs(num_steps=3000),
      eval_args=trainer_pb2.EvalArgs(num_steps=500))

  # Get the latest blessed model for model validation.
  model_resolver = ResolverNode(
      instance_name='latest_blessed_model_resolver',
      resolver_class=latest_blessed_model_resolver.LatestBlessedModelResolver,
      model=Channel(type=Model),
      model_blessing=Channel(type=ModelBlessing))
  

  #tfma.ModelSpec(
  #          signature_name='serving_default',
  #          label_key='income_bracket',
  #          example_weight_key='fnlwgt')]

  # Uses TFMA to compute a evaluation statistics over features of a model and
  # perform quality validation of a candidate model (compared to a baseline).
  
  eval_config = tfma.EvalConfig(
    model_specs=[
        # Using signature 'eval' implies the use of an EvalSavedModel. To use
        # a serving model remove the signature to defaults to 'serving_default'
        # and add a label_key.
        #tfma.ModelSpec(signature_name='eval')
        tfma.ModelSpec(signature_name='serving_default',
                       label_key=_LABEL_KEY)
    ],
    metrics_specs=[
        tfma.MetricsSpec(
            # The metrics added here are in addition to those saved with the
            # model (assuming either a keras model or EvalSavedModel is used).
            # Any metrics added into the saved model (for example using
            # model.compile(..., metrics=[...]), etc) will be computed
            # automatically.
            metrics=[
                tfma.MetricConfig(class_name='ExampleCount')
            ],
            # To add validation thresholds for metrics saved with the model,
            # add them keyed by metric name to the thresholds map.
            thresholds = {
                'accuracy': tfma.MetricThreshold(
                    value_threshold=tfma.GenericValueThreshold(
                        lower_bound={'value': 0.5}),
                    change_threshold=tfma.GenericChangeThreshold(
                       direction=tfma.MetricDirection.HIGHER_IS_BETTER,
                       absolute={'value': -1e-10}))
            }
        )
    ],
    slicing_specs=[
        # An empty slice spec means the overall slice, i.e. the whole dataset.
        tfma.SlicingSpec(),
        # Data can be sliced along a feature column. In this case, data is
        # sliced along feature column Sex.
        tfma.SlicingSpec(feature_keys=['Sex']),
        tfma.SlicingSpec(feature_keys=['Age']),
        tfma.SlicingSpec(feature_keys=['Age_xf']),
        tfma.SlicingSpec(feature_keys=['Fare']),
        tfma.SlicingSpec(feature_keys=['Parch']),
        tfma.SlicingSpec(feature_keys=['Parch_xf']),
        tfma.SlicingSpec(feature_keys=['SibSp']),
        tfma.SlicingSpec(feature_keys=['SibSp_xf']),
        
    ])
  
  evaluator = Evaluator(
      examples=example_gen.outputs['examples'],
      model=trainer.outputs['model'],
      baseline_model=model_resolver.outputs['model'],
      # Change threshold will be ignored if there is no baseline (first run).
      eval_config=eval_config)
  

  # Checks whether the model passed the validation steps and pushes the model
  # to a file destination if check passed.
  pusher = Pusher(
      model=trainer.outputs['model'],
      model_blessing=evaluator.outputs['blessing'],
      push_destination=pusher_pb2.PushDestination(
          filesystem=pusher_pb2.PushDestination.Filesystem(
              base_directory=serving_model_dir)))

  components = [
      example_gen,
      statistics_gen,
      schema_gen,
      example_validator,
      transform,
      trainer,
      model_resolver,
      evaluator,
      pusher,
  ]
  if enable_tuning:
    components.append(tuner)

  return pipeline.Pipeline(
      pipeline_name=pipeline_name,
      pipeline_root=pipeline_root,
      components=components,
      enable_cache=True,
      metadata_connection_config=metadata.sqlite_metadata_connection_config(
          metadata_path),
      beam_pipeline_args=beam_pipeline_args)

def run_pipeline(enable_tuning=False):
  absl.logging.set_verbosity(absl.logging.INFO)
  created_pipeline = _create_pipeline(
          pipeline_name=_pipeline_name,
          pipeline_root=_pipeline_root,
          data_root=_train_dirpath,
          module_file=_module_file,
          serving_model_dir=_serving_model_dir,
          metadata_path=_metadata_path,
          enable_tuning=enable_tuning,
          beam_pipeline_args=_beam_pipeline_args,
          train_ratio_percent=0.8)
  LocalDagRunner().run(created_pipeline)
  


### Example feature code 

In [24]:
from typing import List, Text
import absl
import kerastuner
import numpy as np
import tensorflow as tf
from tensorflow import keras
import tensorflow_transform as tft
import tensorflow_model_analysis as tfma
from tensorflow_transform.tf_metadata import schema_utils

from tfx.components.trainer.executor import TrainerFnArgs
from tfx.components.trainer.fn_args_utils import DataAccessor
from tfx.components.trainer.fn_args_utils import FnArgs
from tfx.components.tuner.component import TunerFnResult
from tfx_bsl.tfxio import dataset_options


_DROPPPED_FEATURES = [
  'PassengerId', 'Name'
]

# Categorical features are assumed to each have a maximum value in the dataset.
_MAX_CATEGORICAL_FEATURE_VALUES = [10, 10, 10]

_CATEGORICAL_FEATURE_KEYS = [
    #'Embarked', 'Pclass', 'Sex'
]

_DENSE_FLOAT_FEATURE_KEYS = [
  'Age', 'Fare'
]

# Number of buckets used by tf.transform for encoding each feature.
_FEATURE_BUCKET_COUNT = 10
_FEATURE_BUCKET_COUNT_MAP = {'Parch': 3, 'SibSp': 3}
_FEATURE_BUCKET_BOUNDARIES = {'Parch': [[0.0, 1.0, 2.0]], 'SibSp': [[0.0, 1.0, 2.0]]}

_BUCKET_FEATURE_KEYS = [
    'Parch', 'SibSp'
]

# Number of vocabulary terms used for encoding VOCAB_FEATURES by tf.transform
_VOCAB_SIZE = 1000
_VOCAB_SIZE_MAP = {'Embarked': 3, 'Pclass': 3, 'Sex':2}

# Count of out-of-vocab buckets in which unrecognized VOCAB_FEATURES are hashed.
_OOV_SIZE = 10

_VOCAB_FEATURE_KEYS = [
   'Embarked', 'Pclass', 'Sex'
]

# Keys
_LABEL_KEY = 'Survived'
_FARE_KEY = 'Fare'
_CABIN_KEY = 'Cabin'
_PARCH_KEY = 'Parch'
_SIBSP_KEY = 'SibSp'

_ALL_DATA_SIZE = 891
train_ratio_percent=0.8
_TRAIN_DATA_SIZE = int(_ALL_DATA_SIZE * train_ratio_percent)
_EVAL_DATA_SIZE = _ALL_DATA_SIZE - _TRAIN_DATA_SIZE
_TRAIN_BATCH_SIZE = 128
_EVAL_BATCH_SIZE = 128


real_keys =  _DENSE_FLOAT_FEATURE_KEYS
sparse_keys = _VOCAB_FEATURE_KEYS + _BUCKET_FEATURE_KEYS + _CATEGORICAL_FEATURE_KEYS 

print(real_keys)
print(sparse_keys)

['Age', 'Fare']
['Embarked', 'Pclass', 'Sex', 'Parch', 'SibSp']


### Create module file for components 

In [25]:
%%writefile {_module_file}

from typing import List, Text
import absl
import kerastuner
import numpy as np
import tensorflow as tf
from tensorflow import keras
import tensorflow_transform as tft
import tensorflow_model_analysis as tfma
from tensorflow_transform.tf_metadata import schema_utils

from tfx.components.trainer.executor import TrainerFnArgs
from tfx.components.trainer.fn_args_utils import DataAccessor
from tfx.components.trainer.fn_args_utils import FnArgs
from tfx.components.tuner.component import TunerFnResult
from tfx_bsl.tfxio import dataset_options


_DROPPPED_FEATURES = [
  'PassengerId', 'Name'
]

# Categorical features are assumed to each have a maximum value in the dataset.
_MAX_CATEGORICAL_FEATURE_VALUES = [10, 10, 10]

_CATEGORICAL_FEATURE_KEYS = [
    #'Embarked', 'Pclass', 'Sex'
]

_DENSE_FLOAT_FEATURE_KEYS = [
  'Age', 'Fare'
]

# Number of buckets used by tf.transform for encoding each feature.
_FEATURE_BUCKET_COUNT = 10
_FEATURE_BUCKET_COUNT_MAP = {'Parch': 3, 'SibSp': 3}
_FEATURE_BUCKET_BOUNDARIES = {'Parch': [[0.0, 1.0, 2.0]], 'SibSp': [[0.0, 1.0, 2.0]]}

_BUCKET_FEATURE_KEYS = [
    'Parch', 'SibSp'
]

# Number of vocabulary terms used for encoding VOCAB_FEATURES by tf.transform
_VOCAB_SIZE = 1000
_VOCAB_SIZE_MAP = {'Embarked': 3, 'Pclass': 3, 'Sex':2}

# Count of out-of-vocab buckets in which unrecognized VOCAB_FEATURES are hashed.
_OOV_SIZE = 10

_VOCAB_FEATURE_KEYS = [
   'Embarked', 'Pclass', 'Sex'
]

# Keys
_LABEL_KEY = 'Survived'
_FARE_KEY = 'Fare'
_CABIN_KEY = 'Cabin'
_PARCH_KEY = 'Parch'
_SIBSP_KEY = 'SibSp'

_ALL_DATA_SIZE = 891
train_ratio_percent=0.8
_TRAIN_DATA_SIZE = int(_ALL_DATA_SIZE * train_ratio_percent)
_EVAL_DATA_SIZE = _ALL_DATA_SIZE - _TRAIN_DATA_SIZE
_TRAIN_BATCH_SIZE = 64
_EVAL_BATCH_SIZE = 64


def _transformed_name(key):
  return key + '_xf'

def _get_bucket_count(key):
  if key in _FEATURE_BUCKET_BOUNDARIES:
    return np.shape(_FEATURE_BUCKET_BOUNDARIES.get(key))[1]
  if key in _FEATURE_BUCKET_COUNT_MAP:
    return _FEATURE_BUCKET_COUNT_MAP.get(key)
  return _FEATURE_BUCKET_COUNT    


def _transformed_names(keys):
  return [_transformed_name(key) for key in keys]


def _fill_in_missing(x):
  """Replace missing values in a SparseTensor.
  Fills in missing values of `x` with '' or 0, and converts to a dense tensor.
  Args:
    x: A `SparseTensor` of rank 2.  Its dense shape should have size at most 1
      in the second dimension.
  Returns:
    A rank 1 tensor where missing values of `x` have been filled in.
  """

  default_value = '' if x.dtype == tf.string else 0
  return tf.squeeze(
      tf.sparse.to_dense(
          tf.SparseTensor(x.indices, x.values, [x.dense_shape[0], 1]),
          default_value),
      axis=1)
  
def preprocessing_fn(inputs):
  """tf.transform's callback function for preprocessing inputs.
  Args:
    inputs: map from feature keys to raw not-yet-transformed features.
  Returns:
    Map from string feature key to transformed feature operations.
  """
  outputs = {}
  for key in _DENSE_FLOAT_FEATURE_KEYS:
    # Preserve this feature as a dense float, setting nan's to the mean.
    outputs[_transformed_name(key)] = tft.scale_to_z_score(
        _fill_in_missing(inputs[key]))

  for key in _VOCAB_FEATURE_KEYS:
    # Build a vocabulary for this feature.
    outputs[_transformed_name(key)] = tft.compute_and_apply_vocabulary(
        _fill_in_missing(inputs[key]),
        top_k=_VOCAB_SIZE_MAP.get(key, _VOCAB_SIZE),
        num_oov_buckets=_OOV_SIZE)

  for key in _BUCKET_FEATURE_KEYS:
    if key in _FEATURE_BUCKET_BOUNDARIES:
      bucket_boundaries = tf.constant(_FEATURE_BUCKET_BOUNDARIES.get(key))
      tf.print("bucket_boundaries:", bucket_boundaries, output_stream=absl.logging.info)
      outputs[_transformed_name(key)] = tft.apply_buckets(_fill_in_missing(inputs[key]),
                                                          bucket_boundaries)
    else:
      outputs[_transformed_name(key)] = tft.bucketize(
        _fill_in_missing(inputs[key]), _FEATURE_BUCKET_COUNT_MAP.get(key, _FEATURE_BUCKET_COUNT))

  for key in _CATEGORICAL_FEATURE_KEYS:
    outputs[_transformed_name(key)] = _fill_in_missing(inputs[key])

  # Was this passenger a big tipper?
  #taxi_fare = _fill_in_missing(inputs[_FARE_KEY])
  #tips = _fill_in_missing(inputs[_LABEL_KEY])

  outputs[_transformed_name(_LABEL_KEY)] = _fill_in_missing(inputs[_LABEL_KEY])

  return outputs


# Tf.Transform considers these features as "raw"
def _get_raw_feature_spec(schema):
  return schema_utils.schema_as_feature_spec(schema).feature_spec

def _get_serve_tf_examples_fn(model, tf_transform_output):
  """Returns a function that parses a serialized tf.Example."""

  model.tft_layer = tf_transform_output.transform_features_layer()
  #tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

  @tf.function
  def serve_tf_examples_fn(serialized_tf_examples):
    """Returns the output to be used in the serving signature."""
    feature_spec = tf_transform_output.raw_feature_spec()
    feature_spec.pop(_LABEL_KEY)
    parsed_features = tf.io.parse_example(serialized_tf_examples, feature_spec)

    transformed_features = model.tft_layer(parsed_features)

    return model(transformed_features)

  return serve_tf_examples_fn

def _example_serving_receiver_fn(tf_transform_graph, schema):
  """Build the serving in inputs.
  Args:
    tf_transform_graph: A TFTransformOutput.
    schema: the schema of the input data.
  Returns:
    Tensorflow graph which parses examples, applying tf-transform to them.
  """
  raw_feature_spec = _get_raw_feature_spec(schema)
  raw_feature_spec.pop(_LABEL_KEY)

  raw_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
      raw_feature_spec, default_batch_size=None)
  serving_input_receiver = raw_input_fn()

  transformed_features = tf_transform_graph.transform_raw_features(
      serving_input_receiver.features)

  return tf.estimator.export.ServingInputReceiver(
      transformed_features, serving_input_receiver.receiver_tensors)


def _eval_input_receiver_fn(tf_transform_graph, schema):
  """Build everything needed for the tf-model-analysis to run the model.
  Args:
    tf_transform_graph: A TFTransformOutput.
    schema: the schema of the input data.
  Returns:
    EvalInputReceiver function, which contains:
      - Tensorflow graph which parses raw untransformed features, applies the
        tf-transform preprocessing operators.
      - Set of raw, untransformed features.
      - Label against which predictions will be compared.
  """
  # Notice that the inputs are raw features, not transformed features here.
  raw_feature_spec = _get_raw_feature_spec(schema)

  serialized_tf_example = tf.compat.v1.placeholder(
      dtype=tf.string, shape=[None], name='input_example_tensor')

  # Add a parse_example operator to the tensorflow graph, which will parse
  # raw, untransformed, tf examples.
  features = tf.io.parse_example(serialized_tf_example, raw_feature_spec)

  # Now that we have our raw examples, process them through the tf-transform
  # function computed during the preprocessing step.
  transformed_features = tf_transform_graph.transform_raw_features(
      features)

  # The key name MUST be 'examples'.
  receiver_tensors = {'examples': serialized_tf_example}

  # NOTE: Model is driven by transformed features (since training works on the
  # materialized output of TFT, but slicing will happen on raw features.
  features.update(transformed_features)

  return tfma.export.EvalInputReceiver(
      features=features,
      receiver_tensors=receiver_tensors,
      labels=transformed_features[_transformed_name(_LABEL_KEY)])


def _input_fn(file_pattern, data_accessor, tf_transform_output, batch_size=_TRAIN_BATCH_SIZE):
  """Generates features and label for tuning/training.

  Args:
    file_pattern: List of paths or patterns of input tfrecord files.
    data_accessor: DataAccessor for converting input to RecordBatch.
    tf_transform_output: A TFTransformOutput.
    batch_size: representing the number of consecutive elements of returned
      dataset to combine in a single batch

  Returns:
    A dataset that contains (features, indices) tuple where features is a
      dictionary of Tensors, and indices is a single Tensor of label indices.
  """
  return data_accessor.tf_dataset_factory(
      file_pattern,
      dataset_options.TensorFlowDatasetOptions(
          batch_size=batch_size, label_key=_transformed_name(_LABEL_KEY)),
      tf_transform_output.transformed_metadata.schema)

def _get_hyperparameters() -> kerastuner.HyperParameters:
  """Returns hyperparameters for building Keras model."""
  hp = kerastuner.HyperParameters()
  # Defines search space.
  hp.Choice('learning_rate', [1e-2, 3e-3, 1e-3], default=1e-2)
  hp.Choice('first_dnn_layer_size', [128, 96, 64, 32], default=128)
  hp.Choice('dnn_decay_factor', [0.3, 0.4, 0.5, 0.6, 0.7], default=0.5)
  hp.Int('num_dnn_layers', 1, 3, default=2)
  #hp.Float('dropout_rate', 0.1, 0.5, default=0.2)
  return hp

# Build a wide-and-deep model.
def wide_and_deep_classifier(inputs, linear_feature_columns, dnn_feature_columns, dnn_hidden_units, optimizer):
    deep = tf.keras.layers.DenseFeatures(dnn_feature_columns, name='deep_inputs')(inputs)
    #layers = [int(x) for x in dnn_hidden_units.split(',')]
    layers = dnn_hidden_units 
    for layerno, numnodes in enumerate(layers):
        deep = tf.keras.layers.Dense(numnodes, activation='relu', name='dnn_{}'.format(layerno+1))(deep)        
    wide = tf.keras.layers.DenseFeatures(linear_feature_columns, name='wide_inputs')(inputs)
    both = tf.keras.layers.concatenate([deep, wide], name='both')
    output = tf.keras.layers.Dense(1, activation='sigmoid', name='pred')(both)
    model = tf.keras.Model(inputs, output)
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=['accuracy']
                  #,run_eagerly=True
                  )
    
    return model

def _build_keras_model(hparams: kerastuner.HyperParameters) -> tf.keras.Model:
  """Creates a DNN Keras model for classifying penguin data.
  Args:
    hparams: Holds HyperParameters for tuning.
  Returns:
    A Keras Model.
  """
  # The model below is built with Functional API, please refer to
  # https://www.tensorflow.org/guide/keras/overview for all API options.

  real_keys =  _DENSE_FLOAT_FEATURE_KEYS
  sparse_keys = _VOCAB_FEATURE_KEYS + _BUCKET_FEATURE_KEYS + _CATEGORICAL_FEATURE_KEYS

  real_valued_columns = [
      tf.feature_column.numeric_column(key, shape=())
      for key in _transformed_names(_DENSE_FLOAT_FEATURE_KEYS)
  ]

  categorical_columns = [
      tf.feature_column.categorical_column_with_identity(
          key, num_buckets=_VOCAB_SIZE + _OOV_SIZE, default_value=0)
      for key in _transformed_names(_VOCAB_FEATURE_KEYS)
  ]

  categorical_columns += [
      tf.feature_column.categorical_column_with_identity(
          _transformed_name(key), num_buckets=_get_bucket_count(key), default_value=0)
      for key in _BUCKET_FEATURE_KEYS
  ]

  categorical_columns += [
      tf.feature_column.categorical_column_with_identity(  # pylint: disable=g-complex-comprehension
          key,
          num_buckets=num_buckets,
          default_value=0) for key, num_buckets in zip(
              _transformed_names(_CATEGORICAL_FEATURE_KEYS),
              _MAX_CATEGORICAL_FEATURE_VALUES)
  ]

  indicator_columns = [
      tf.feature_column.indicator_column(categorical_column)
      for categorical_column in categorical_columns
  ]

  first_dnn_layer_size = int(hparams.get('first_dnn_layer_size'))
  dnn_decay_factor = float(hparams.get('dnn_decay_factor'))
  num_dnn_layers = int(hparams.get('num_dnn_layers'))
  learning_rate = float(hparams.get('learning_rate'))

  hidden_units=[
          max(2, int(first_dnn_layer_size * dnn_decay_factor**i))
          for i in range(num_dnn_layers)
      ]
  optimizer = keras.optimizers.Adam(learning_rate)

  inputs = {
    _transformed_name(colname) : tf.keras.layers.Input(name=colname, shape=(), dtype='float32') 
          for colname in real_keys
  }
  inputs.update({
      _transformed_name(colname) : tf.keras.layers.Input(name=colname, shape=(), dtype='int32') 
            for colname in sparse_keys
  })

  model = wide_and_deep_classifier(inputs=inputs, 
                                   linear_feature_columns=real_valued_columns,
                                   dnn_feature_columns=indicator_columns, 
                                   dnn_hidden_units=hidden_units,
                                   optimizer=optimizer)

  model.summary(print_fn=absl.logging.info)
  return model

# TFX Trainer will call this function.
def run_fn(fn_args: TrainerFnArgs):
  """Train the model based on given args.
  Args:
    fn_args: Holds args used to train the model as name/value pairs.
  """
  tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

  train_dataset = _input_fn(
      fn_args.train_files,
      fn_args.data_accessor,
      tf_transform_output,
      batch_size=_TRAIN_BATCH_SIZE)
  eval_dataset = _input_fn(
      fn_args.eval_files,
      fn_args.data_accessor,
      tf_transform_output,
      batch_size=_EVAL_BATCH_SIZE)

  if fn_args.hyperparameters:
    hparams = kerastuner.HyperParameters.from_config(fn_args.hyperparameters)
  else:
    # This is a shown case when hyperparameters is decided and Tuner is removed
    # from the pipeline. User can also inline the hyperparameters directly in
    # _build_keras_model.
    hparams = _get_hyperparameters()
  absl.logging.info('HyperParameters for training: %s' % hparams.get_config())

  mirrored_strategy = tf.distribute.MirroredStrategy()
  with mirrored_strategy.scope():
    model = _build_keras_model(hparams)
  try:
    log_dir = fn_args.model_run_dir
  except KeyError:
    log_dir = os.path.join(os.path.dirname(fn_args.serving_model_dir), 'logs')

  # Write logs to path
  tensorboard_callback = tf.keras.callbacks.TensorBoard(
      log_dir=log_dir, update_freq='batch')
  early_stopping_callback = tf.keras.callbacks.EarlyStopping(
      monitor='val_loss', patience=15)

  steps = fn_args.train_steps # 10000
  batch_steps = steps // _TRAIN_BATCH_SIZE # 10000 // 64 -> 156
  
  steps_per_epoch = _TRAIN_DATA_SIZE // _TRAIN_BATCH_SIZE
  # ( 891 * 0.8 =712) // 32 -> 22 ?

  # Do not specify the batch_size if your data is in the form of datasets, 
  # generators, or keras.utils.Sequence instances (since they generate batches).

  model.fit(
      train_dataset,
      epochs=fn_args.train_steps // steps_per_epoch,
      steps_per_epoch=steps_per_epoch,
      validation_data=eval_dataset,
      validation_steps=fn_args.eval_steps,
      callbacks=[tensorboard_callback, early_stopping_callback])

  signatures = {
      'serving_default':
          _get_serve_tf_examples_fn(model,
                                    tf_transform_output).get_concrete_function(
                                        tf.TensorSpec(
                                            shape=[None],
                                            dtype=tf.string,
                                            name='examples')),
  }
  model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)

# TFX Tuner will call this function.
def tuner_fn(fn_args: FnArgs) -> TunerFnResult:
  """Build the tuner using the KerasTuner API.
  Args:
    fn_args: Holds args as name/value pairs.
      - working_dir: working dir for tuning.
      - train_files: List of file paths containing training tf.Example data.
      - eval_files: List of file paths containing eval tf.Example data.
      - train_steps: number of train steps.
      - eval_steps: number of eval steps.
      - schema_path: optional schema of the input data.
      - transform_graph_path: optional transform graph produced by TFT.
  Returns:
    A namedtuple contains the following:
      - tuner: A BaseTuner that will be used for tuning.
      - fit_kwargs: Args to pass to tuner's run_trial function for fitting the
                    model , e.g., the training and validation dataset. Required
                    args depend on the above tuner's implementation.
  """
  # RandomSearch is a subclass of kerastuner.Tuner which inherits from
  # BaseTuner.
  tuner = kerastuner.RandomSearch(
      _build_keras_model,
      max_trials=10,
      hyperparameters=_get_hyperparameters(),
      allow_new_entries=False,
      objective=kerastuner.Objective('val_accuracy', 'max'),
      directory=fn_args.working_dir,
      project_name='titanic_tuning')
  
  transform_graph = tft.TFTransformOutput(fn_args.transform_graph_path)

  train_dataset = _input_fn(
      fn_args.train_files,
      fn_args.data_accessor,
      transform_graph,
      batch_size=_TRAIN_BATCH_SIZE)
  eval_dataset = _input_fn(
      fn_args.eval_files,
      fn_args.data_accessor,
      transform_graph,
      batch_size=_EVAL_BATCH_SIZE)

  return TunerFnResult(
      tuner=tuner,
      fit_kwargs={
          'x': train_dataset,
          'validation_data': eval_dataset,
          'steps_per_epoch': fn_args.train_steps,
          'validation_steps': fn_args.eval_steps
      })

Overwriting /home/jupyter/titanic/titanic_utils.py


### Run the pipeline

In [None]:
run_pipeline(enable_tuning=True)

INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Component CsvExampleGen is running.
INFO:absl:Running driver for CsvExampleGen
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:select span and version = (0, None)
INFO:absl:latest span and version = (0, None)
INFO:absl:Running publisher for CsvExampleGen
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Component CsvExampleGen is finished.
INFO:absl:Component ResolverNode.latest_blessed_model_resolver is running.
INFO:absl:Running driver for ResolverNode.latest_blessed_model_resolver
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Running publisher for ResolverNode.latest_blessed_model_resolver
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Component ResolverNode.latest_blessed_model_resolver is finished.
INFO:absl:Com





INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
'Counter' object has no attribute 'name'
INFO:tensorflow:SavedModel written to: /home/jupyter/tfx/pipelines/titanic_local/Transform/transform_graph/26/.temp_path/tftransform_tmp/6d1ceaa5773e48c797734c1ed0b7be78/saved_model.pb
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
'Counter' object has no attribute 'name'
INFO:tensorflow:SavedModel written to: /home/jupyter/tfx/pipelines/titanic_local/Transform/transform_graph/26/.temp_path/tftransform_tmp/a848ae22a6a446bc8a668b14df042e89/saved_model.pb


INFO:absl:Feature Name has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature Embarked has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature Sex has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature Ticket has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature Age has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature Cabin has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature Fare has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature Parch has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature PassengerId has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature Pclass has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature SibSp has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature Survived has no shape. Setting to VarLenSparseTensor.




INFO:absl:Feature Name has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature Embarked has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature Sex has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature Ticket has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature Age has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature Cabin has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature Fare has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature Parch has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature PassengerId has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature Pclass has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature SibSp has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature Survived has no shape. Setting to VarLenSparseTensor.




INFO:absl:Running publisher for Transform
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Component Transform is finished.
INFO:absl:Component Tuner is running.
INFO:absl:Running driver for Tuner
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Running executor for Tuner
INFO:absl:Attempting to infer TFX Python dependency for beam
INFO:absl:Copying all content from install dir /opt/conda/lib/python3.7/site-packages/tfx to temp dir /tmp/tmpaobx5o6y/build/tfx
INFO:absl:Generating a temp setup file at /tmp/tmpaobx5o6y/build/tfx/setup.py
INFO:absl:Creating temporary sdist package, logs available at /tmp/tmpaobx5o6y/build/tfx/setup.log
INFO:absl:Added --extra_package=/tmp/tmpaobx5o6y/build/tfx/dist/tfx_ephemeral-0.25.0.tar.gz to beam args
INFO:absl:Creating temp directory at /home/jupyter/tfx/pipelines/titanic_local/.temp/27/
INFO:absl:Train on the 'train' split when train_args.splits is not set.
INFO:absl:Evaluate on the 'eval' split when eval_args.splits i

INFO:absl:Start tuning... Tuner ID: tuner0
INFO:absl:Model: "functional_1"
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Layer (type)                    Output Shape         Param #     Connected to                     
INFO:absl:Age (InputLayer)                [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Embarked (InputLayer)           [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Fare (InputLayer)               [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Parch (InputLayer)              [(None,)]  



INFO:absl:Model: "functional_1"
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Layer (type)                    Output Shape         Param #     Connected to                     
INFO:absl:Age (InputLayer)                [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Embarked (InputLayer)           [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Fare (InputLayer)               [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Parch (InputLayer)              [(None,)]            0                                



INFO:absl:Model: "functional_1"
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Layer (type)                    Output Shape         Param #     Connected to                     
INFO:absl:Age (InputLayer)                [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Embarked (InputLayer)           [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Fare (InputLayer)               [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Parch (InputLayer)              [(None,)]            0                                



INFO:absl:Model: "functional_1"
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Layer (type)                    Output Shape         Param #     Connected to                     
INFO:absl:Age (InputLayer)                [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Embarked (InputLayer)           [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Fare (InputLayer)               [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Parch (InputLayer)              [(None,)]            0                                



INFO:absl:Model: "functional_1"
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Layer (type)                    Output Shape         Param #     Connected to                     
INFO:absl:Age (InputLayer)                [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Embarked (InputLayer)           [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Fare (InputLayer)               [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Parch (InputLayer)              [(None,)]            0                                



INFO:absl:Model: "functional_1"
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Layer (type)                    Output Shape         Param #     Connected to                     
INFO:absl:Age (InputLayer)                [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Embarked (InputLayer)           [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Fare (InputLayer)               [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Parch (InputLayer)              [(None,)]            0                                



INFO:absl:Model: "functional_1"
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Layer (type)                    Output Shape         Param #     Connected to                     
INFO:absl:Age (InputLayer)                [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Embarked (InputLayer)           [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Fare (InputLayer)               [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Parch (InputLayer)              [(None,)]            0                                



INFO:absl:Model: "functional_1"
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Layer (type)                    Output Shape         Param #     Connected to                     
INFO:absl:Age (InputLayer)                [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Embarked (InputLayer)           [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Fare (InputLayer)               [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Parch (InputLayer)              [(None,)]            0                                



INFO:absl:Model: "functional_1"
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Layer (type)                    Output Shape         Param #     Connected to                     
INFO:absl:Age (InputLayer)                [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Embarked (InputLayer)           [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Fare (InputLayer)               [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Parch (InputLayer)              [(None,)]            0                                



INFO:absl:Model: "functional_1"
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Layer (type)                    Output Shape         Param #     Connected to                     
INFO:absl:Age (InputLayer)                [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Embarked (InputLayer)           [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Fare (InputLayer)               [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Parch (InputLayer)              [(None,)]            0                                



INFO:tensorflow:Oracle triggered exit


INFO:absl:Finished tuning... Tuner ID: tuner0


INFO:absl:Best HyperParameters: {'space': [{'class_name': 'Choice', 'config': {'name': 'learning_rate', 'default': 0.01, 'values': [0.01, 0.003, 0.001], 'ordered': True}}, {'class_name': 'Choice', 'config': {'name': 'first_dnn_layer_size', 'default': 128, 'values': [128, 96, 64, 32], 'ordered': True}}, {'class_name': 'Choice', 'config': {'name': 'dnn_decay_factor', 'default': 0.5, 'values': [0.3, 0.4, 0.5, 0.6, 0.7], 'ordered': True}}, {'class_name': 'Int', 'config': {'name': 'num_dnn_layers', 'default': 2, 'min_value': 1, 'max_value': 3, 'step': 1, 'sampling': None}}], 'values': {'learning_rate': 0.001, 'first_dnn_layer_size': 64, 'dnn_decay_factor': 0.7, 'num_dnn_layers': 1}}
INFO:absl:Best Hyperparameters are written to /home/jupyter/tfx/pipelines/titanic_local/Tuner/best_hyperparameters/27/best_hyperparameters.txt.
INFO:absl:Running publisher for Tuner
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Component Tuner is finished.
INFO:absl:Component Trainer is runnin

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


INFO:absl:Model: "functional_3"
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Layer (type)                    Output Shape         Param #     Connected to                     
INFO:absl:Age (InputLayer)                [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Embarked (InputLayer)           [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Fare (InputLayer)               [(None,)]            0                                            
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Parch (InputLayer)              [(None,)]            0                                

Epoch 1/272
Epoch 2/272
Epoch 3/272
Epoch 4/272
Epoch 5/272
Epoch 6/272
Epoch 7/272
Epoch 8/272
Epoch 9/272
Epoch 10/272
Epoch 11/272
Epoch 12/272
Epoch 13/272
Epoch 14/272
Epoch 15/272
Epoch 16/272
Epoch 17/272
Epoch 18/272
Epoch 19/272
Epoch 20/272
Epoch 21/272
Epoch 22/272
Epoch 23/272
Epoch 24/272
Epoch 25/272
Epoch 26/272
Epoch 27/272
Epoch 28/272
Epoch 29/272
Epoch 30/272
Epoch 31/272
Epoch 32/272
Epoch 33/272
Epoch 34/272
Epoch 35/272
Epoch 36/272
Epoch 37/272
Epoch 38/272
Epoch 39/272
Epoch 40/272
Epoch 41/272
Epoch 42/272
Epoch 43/272
Epoch 44/272
Epoch 45/272
Epoch 46/272
Epoch 47/272
Epoch 48/272
Epoch 49/272
INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:Assets written to: /home/jupyter/tfx/pipelines/titanic_local/Trainer/model/28/serving_model_dir/assets


INFO:absl:Training complete. Model written to /home/jupyter/tfx/pipelines/titanic_local/Trainer/model/28/serving_model_dir. ModelRun written to /home/jupyter/tfx/pipelines/titanic_local/Trainer/model_run/28
INFO:absl:Running publisher for Trainer
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Component Trainer is finished.
INFO:absl:Component Evaluator is running.
INFO:absl:Running driver for Evaluator
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Running executor for Evaluator
INFO:absl:Attempting to infer TFX Python dependency for beam
INFO:absl:Copying all content from install dir /opt/conda/lib/python3.7/site-packages/tfx to temp dir /tmp/tmpq4luil0w/build/tfx
INFO:absl:Generating a temp setup file at /tmp/tmpq4luil0w/build/tfx/setup.py
INFO:absl:Creating temporary sdist package, logs available at /tmp/tmpq4luil0w/build/tfx/setup.log
INFO:absl:Added --extra_package=/tmp/tmpq4luil0w/build/tfx/dist/tfx_ephemeral-0.25.0.tar.gz to beam args
        



INFO:absl:The 'example_splits' parameter is not set, using 'eval' split.
INFO:absl:Evaluating model.




### Write tfx_utils.py
( taken from [https://github.com/tensorflow/tfx/blob/master/tfx/examples/airflow_workshop/notebooks/tfx_utils.py](https://github.com/tensorflow/tfx/blob/master/tfx/examples/airflow_workshop/notebooks/tfx_utils.py)

### Write utils.py
( taken from [https://github.com/tensorflow/tfx/blob/master/tfx/examples/airflow_workshop/notebooks/utils.py](https://github.com/tensorflow/tfx/blob/master/tfx/examples/airflow_workshop/notebooks/utils.py)

### Analyze outputs after pipeline is run

In [None]:
import os
import tfx_utils

def get_metadata_store():
    return tfx_utils.TFXReadonlyMetadataStore.from_sqlite_db(_metadata_path)

store = get_metadata_store()

store.get_artifacts_of_type_df(tfx_utils.TFXArtifactTypes.MODEL)


In [None]:
!ls -al {_pipeline_root}/Trainer/model

In [18]:
!ls -al {_pipeline_root}/.temp/7/titanic_tuning

total 28
drwxr-xr-x 5 jupyter jupyter 4096 Jan  4 21:12 .
drwxr-xr-x 3 jupyter jupyter 4096 Jan  4 21:11 ..
-rw-r--r-- 1 jupyter jupyter  857 Jan  4 21:12 oracle.json
drwxr-xr-x 3 jupyter jupyter 4096 Jan  4 21:12 trial_887b3413f8a9c0e7e43afdb77bb54179
drwxr-xr-x 3 jupyter jupyter 4096 Jan  4 21:11 trial_e02d9066eca4a937c325c25f0f97c35d
drwxr-xr-x 3 jupyter jupyter 4096 Jan  4 21:12 trial_e1ed5f5ff0148d539ddc3385a3a9b6bc
-rw-r--r-- 1 jupyter jupyter    2 Jan  4 21:12 tuner0.json


In [None]:
!ls -al {_pipeline_root}/Trainer/model/27/eval_model_dir

In [19]:
#tensorboard_logdir = os.path.join(_pipeline_root, 'Trainer/model_run')
tensorboard_logdir = os.path.join(_pipeline_root, '.temp')

print('tensorboard_logdir: {}'.format(tensorboard_logdir))
os.environ['TENSORBOARD_LOGDIR'] = tensorboard_logdir

%load_ext tensorboard
%tensorboard --logdir {tensorboard_logdir}

tensorboard_logdir: /home/jupyter/tfx/pipelines/titanic_local/.temp
The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 9585), started 0:02:28 ago. (Use '!kill 9585' to kill it.)

In [None]:
model_run_dir = trainer.outputs['model_run'].get()[0].uri