In [1]:
# !pip install tfx

In [14]:
import os

_PIPELINE_NAME = "census-pipelines"

# Output directory to store artifacts generated from the pipeline.
_PIPELINE_ROOT = os.path.join('pipelines_local', _PIPELINE_NAME)
# Path to a SQLite DB file to use as an MLMD storage.
_METADATA_PATH = os.path.join('metadata_local', _PIPELINE_NAME, 'metadata.db')
# Output directory where created models from the pipeline will be exported.
_SERVING_MODEL_DIR = os.path.join('serving_model_local', _PIPELINE_NAME)

from absl import logging
logging.set_verbosity(logging.INFO)  # Set default logging level.

In [15]:
_DATA_DIR = "./data"
_DATA_ROOT = "./data/census"
_data_filepath = os.path.join(_DATA_ROOT, "data.csv")

In [7]:
# !mkdir {DATA_DIR}
# !mkdir {DATA_ROOT}

In [8]:
# Preview dataset
!head {_data_filepath}

age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,label
39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,0
50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,0
38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,0
53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,0
28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,0
37,Private,284582,Masters,14,Married-civ-spouse,Exec-managerial,Wife,White,Female,0,0,40,United-States,0
49,Private,160187,9th,5,Married-spouse-absent,Other-service,Not-in-family,Black,Female,0,0,16,Jamaica,0
52,Self-emp-not-inc,209642,HS-grad,9,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,45,United-S

In [10]:
import os
import pprint

import tensorflow as tf
import tensorflow_model_analysis as tfma

from tfx import v1 as tfx
from absl import logging as absl_logging
from apache_beam import logging as beam_logging

from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext

from trainer import eval_config


tf.get_logger().propagate = False
tf.get_logger().setLevel('ERROR')
absl_logging.set_verbosity('ERROR')
beam_logging.getLogger().setLevel('ERROR')
pp = pprint.PrettyPrinter()

INFO:absl:tensorflow_io is not available: No module named 'tensorflow_io'
INFO:absl:tensorflow_ranking is not available: No module named 'tensorflow_ranking'
INFO:absl:tensorflow_text is not available: No module named 'tensorflow_text'
INFO:absl:tensorflow_decision_forests is not available: No module named 'tensorflow_decision_forests'
INFO:absl:struct2tensor is not available: No module named 'struct2tensor'
INFO:absl:tensorflow_text is not available.
INFO:absl:tensorflow_recommenders is not available.


In [13]:
def _create_pipeline(pipeline_name: str, pipeline_root: str, data_root: str,
                     transform_module_file: str, trainer_module_file : str,
                     serving_model_dir: str,
                     metadata_path: str) -> tfx.dsl.Pipeline:
  """Creates pipeline with TFX."""
  # Brings data into the pipeline.
  example_gen = tfx.components.CsvExampleGen(input_base=data_root)

  statistics_gen = tfx.components.StatisticsGen(
    examples=example_gen.outputs['examples'])

  schema_gen = tfx.components.SchemaGen(
    statistics=statistics_gen.outputs['statistics'])

  example_validator = tfx.components.ExampleValidator(
    statistics=statistics_gen.outputs['statistics'],
    schema=schema_gen.outputs['schema'])

  transform = tfx.components.Transform(
    examples=example_gen.outputs['examples'],
    schema=schema_gen.outputs['schema'],
    module_file=os.path.abspath(transform_module_file))

  # Uses user-provided Python function that trains a model.
  trainer = tfx.components.Trainer(
    module_file=os.path.abspath(trainer_module_file),
    examples=transform.outputs['transformed_examples'],
    transform_graph=transform.outputs['transform_graph'],
    schema=schema_gen.outputs['schema'],
    train_args=tfx.proto.TrainArgs(num_steps=50),
    eval_args=tfx.proto.EvalArgs(num_steps=50))

  model_resolver = tfx.dsl.Resolver(
      strategy_class=tfx.dsl.experimental.LatestBlessedModelStrategy,
      model=tfx.dsl.Channel(type=tfx.types.standard_artifacts.Model),
      model_blessing=tfx.dsl.Channel(
          type=tfx.types.standard_artifacts.ModelBlessing)).with_id(
              'latest_blessed_model_resolver')

  evaluator = tfx.components.Evaluator(
    examples=example_gen.outputs['examples'],
    model=trainer.outputs['model'],
    baseline_model=model_resolver.outputs['model'],
    eval_config=eval_config)

  # Pushes the model to a filesystem destination.
  pusher = tfx.components.Pusher(
    model=trainer.outputs['model'],
    model_blessing=evaluator.outputs['blessing'],
    push_destination=tfx.proto.PushDestination(
        filesystem=tfx.proto.PushDestination.Filesystem(
            base_directory=serving_model_dir)))

  # Following three components will be included in the pipeline.
  components = [
      example_gen,
      statistics_gen,
      schema_gen,
      example_validator,
      transform,
      trainer,
      model_resolver,
      evaluator,
      pusher,
  ]

  return tfx.dsl.Pipeline(
      pipeline_name=pipeline_name,
      pipeline_root=pipeline_root,
      metadata_connection_config=tfx.orchestration.metadata
      .sqlite_metadata_connection_config(metadata_path),
      components=components)

In [16]:
_trainer_module_file = './trainer/trainer_file1.py'
_transform_module_file = "./trainer/transform_file.py"

In [17]:
tfx.orchestration.LocalDagRunner().run(
  _create_pipeline(
      pipeline_name=_PIPELINE_NAME,
      pipeline_root=_PIPELINE_ROOT,
      data_root=_DATA_ROOT,
      transform_module_file=_transform_module_file,
      trainer_module_file = _trainer_module_file,
      serving_model_dir=_SERVING_MODEL_DIR,
      metadata_path=_METADATA_PATH))

INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Generating ephemeral wheel package for '/content/trainer/transform_file.py' (including modules: ['trainer_file3', 'trainer_file1', 'transform_file', 'constants_file', 'trainer_file2']).
INFO:absl:User module package has hash fingerprint version e25210596ad9077d3647f9639f8c4f5b7b9c1dd8b50f12aabf0f73d7c05568ca.
INFO:absl:Executing: ['/usr/bin/python3', '/tmp/tmpwlvam0hr/_tfx_generated_setup.py', 'bdist_wheel', '--bdist-dir', '/tmp/tmp0j_dg0n7', '--dist-dir', '/tmp/tmpgctfh_mn']
INFO:absl:Successfully built user code wheel distribution at 'pipelines_local/census-pipelines/_wheels/tfx_user_code_Transform-0.0+e25210596ad9077d3647f9639f8c4f5b7b9c1dd8b50f12aabf0f73d7c05568ca-py3-none-any.whl'; target user module is 'transform_file'.
INFO:absl:Full user module path is 'transform_file@pipeline

INFO:absl:Processing input csv data ./data/census/* to TFExample.
INFO:absl:Examples generated.
INFO:absl:Value type <class 'NoneType'> of key version in exec_properties is not supported, going to drop it
INFO:absl:Value type <class 'list'> of key _beam_pipeline_args in exec_properties is not supported, going to drop it
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 1 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'examples': [Artifact(artifact: uri: "pipelines_local/census-pipelines/CsvExampleGen/examples/1"
custom_properties {
  key: "input_fingerprint"
  value {
    string_value: "split:single_split,num_files:1,total_bytes:3396202,xor_checksum:1694479204,sum_checksum:1694479204"
  }
}
custom_properties {
  key: "span"
  value {
    int_value: 0
  }
}
, artifact_type: name: "Examples"
properties {
  key: "span"
  value: INT
}
properties {
  key: "split_names"
  value: STRING
}
propertie

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 fnlwgt (InputLayer)         [(None, 1)]                  0         []                            
                                                                                                  
 education-num (InputLayer)  [(None, 1)]                  0         []                            
                                                                                                  
 capital-gain (InputLayer)   [(None, 1)]                  0         []                            
                                                                                                  
 capital-loss (InputLayer)   [(None, 1)]                  0         []                            
                                                                                              

INFO:absl:Feature age has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature capital-gain has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature capital-loss has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature education has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature education-num has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature fnlwgt has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature hours-per-week has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature label has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature marital-status has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature native-country has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature occupation has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature race has a shape dim {
  size: 1
}
. Setting 

In [50]:
!ls {_SERVING_MODEL_DIR}

1694480529


In [53]:
!ls {_SERVING_MODEL_DIR+'/1694480529'}

assets	fingerprint.pb	keras_metadata.pb  saved_model.pb  variables


In [57]:
model_path =_SERVING_MODEL_DIR+'/1694480529/'

In [68]:
loaded_model = tf.saved_model.load(model_path)
graph = loaded_model.signatures["serving_default"]
graph.structured_input_signature

((), {'examples': TensorSpec(shape=(None,), dtype=tf.string, name='examples')})

In [69]:
graph.structured_outputs

{'outputs': TensorSpec(shape=(None, 1), dtype=tf.float32, name='outputs')}

In [70]:
!zip -r zipfile.zip metadata_local/ pipelines_local/ serving_model_local/

  adding: metadata_local/ (stored 0%)
  adding: metadata_local/census-pipelines/ (stored 0%)
  adding: metadata_local/census-pipelines/metadata.db (deflated 94%)
  adding: pipelines_local/ (stored 0%)
  adding: pipelines_local/census-pipelines/ (stored 0%)
  adding: pipelines_local/census-pipelines/SchemaGen/ (stored 0%)
  adding: pipelines_local/census-pipelines/SchemaGen/.system/ (stored 0%)
  adding: pipelines_local/census-pipelines/SchemaGen/.system/executor_execution/ (stored 0%)
  adding: pipelines_local/census-pipelines/SchemaGen/.system/executor_execution/4/ (stored 0%)
  adding: pipelines_local/census-pipelines/SchemaGen/schema/ (stored 0%)
  adding: pipelines_local/census-pipelines/SchemaGen/schema/4/ (stored 0%)
  adding: pipelines_local/census-pipelines/SchemaGen/schema/4/schema.pbtxt (deflated 79%)
  adding: pipelines_local/census-pipelines/CsvExampleGen/ (stored 0%)
  adding: pipelines_local/census-pipelines/CsvExampleGen/.system/ (stored 0%)
  adding: pipelines_local/cen