In [1]:
import os
import sys

from typing import Text

from absl import logging
from tfx.orchestration import metadata, pipeline
from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner

In [2]:
PIPELINE_NAME = "derozari-pipeline"

# pipeline inputs
DATA_ROOT = "data"
TRANSFORM_MODULE_FILE = "modules/credit_risk_transform.py"
TRAINER_MODULE_FILE = "modules/credit_risk_trainer.py"
# requirement_file = os.path.join(root,"requirements.txt")

# pipeline outputs
OUTPUT_BASE = "output"
serving_model_dir = os.path.join(OUTPUT_BASE, 'serving_model')
pipeline_root = os.path.join(OUTPUT_BASE, PIPELINE_NAME)
metadata_path = os.path.join(pipeline_root, "metadata.sqlite")

In [3]:

def init_local_pipeline(
    components, pipeline_root: Text
) -> pipeline.Pipeline:

    logging.info(f"Pipeline root set to: {pipeline_root}")
    beam_args = [
        "--direct_running_mode=multi_processing"
        # 0 auto-detect based on the number of CPUs available
        # during execution
        "----direct_num_workers=0"
    ]

    return pipeline.Pipeline(
        pipeline_name=PIPELINE_NAME,
        pipeline_root=pipeline_root,
        components=components,
        enable_cache=True,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            metadata_path
        ),
        eam_pipeline_args=beam_args
    )

In [4]:

if __name__ == "__main__":
    logging.set_verbosity(logging.INFO)

    from modules.components import init_components

    components = init_components(
        DATA_ROOT, 
        transform_module=TRANSFORM_MODULE_FILE, 
        training_module=TRAINER_MODULE_FILE, 
        training_steps=5000, 
        eval_steps=1000, 
        serving_model_dir=serving_model_dir
        )
    
    pipeline = init_local_pipeline(components, pipeline_root)
    BeamDagRunner().run(pipeline=pipeline)

INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Pipeline root set to: output/derozari-pipeline
INFO:absl:Generating ephemeral wheel package for '/home/jorge/Learn/MLOps/submission2/modules/credit_risk_transform.py' (including modules: ['credit_risk_transform', 'components', 'credit_risk_trainer']).
INFO:absl:User module package has hash fingerprint version 9685ff9411bdb3299b1af50bc1b6674f7295c8280011687e73a3112cc521a507.
INFO:absl:Executing: ['/home/jorge/anaconda3/envs/a443-churn/bin/python', '/tmp/tmpacjp0cti/_tfx_generated_setup.py', 'bdist_wheel', '--bdist-dir', '/tmp/tmppj_9q5ut', '--dist-dir', '/tmp/tmpxciyzdbv']
`setuptools.config.parse_configuration` became deprecated.

For the time being, you can use the `setuptools.config.setupcfg` module
to access a backward compatible API, but this module is provisional
and might be rem

running bdist_wheel
running build
running build_py
creating build
creating build/lib
copying credit_risk_transform.py -> build/lib
copying components.py -> build/lib
copying credit_risk_trainer.py -> build/lib
running install
running install_lib
copying build/lib/credit_risk_transform.py -> /tmp/tmppj_9q5ut
copying build/lib/components.py -> /tmp/tmppj_9q5ut
copying build/lib/credit_risk_trainer.py -> /tmp/tmppj_9q5ut
running install_egg_info
running egg_info
creating tfx_user_code_Transform.egg-info
writing tfx_user_code_Transform.egg-info/PKG-INFO
writing dependency_links to tfx_user_code_Transform.egg-info/dependency_links.txt
writing top-level names to tfx_user_code_Transform.egg-info/top_level.txt
writing manifest file 'tfx_user_code_Transform.egg-info/SOURCES.txt'
reading manifest file 'tfx_user_code_Transform.egg-info/SOURCES.txt'
writing manifest file 'tfx_user_code_Transform.egg-info/SOURCES.txt'
Copying tfx_user_code_Transform.egg-info to /tmp/tmppj_9q5ut/tfx_user_code_Transf

`setuptools.config.parse_configuration` became deprecated.

For the time being, you can use the `setuptools.config.setupcfg` module
to access a backward compatible API, but this module is provisional
and might be removed in the future.

  parse_configuration(
INFO:absl:Successfully built user code wheel distribution at 'output/derozari-pipeline/_wheels/tfx_user_code_Trainer-0.0+9685ff9411bdb3299b1af50bc1b6674f7295c8280011687e73a3112cc521a507-py3-none-any.whl'; target user module is 'credit_risk_trainer'.
INFO:absl:Full user module path is 'credit_risk_trainer@output/derozari-pipeline/_wheels/tfx_user_code_Trainer-0.0+9685ff9411bdb3299b1af50bc1b6674f7295c8280011687e73a3112cc521a507-py3-none-any.whl'
INFO:absl:Using deployment config:
 executor_specs {
  key: "CsvExampleGen"
  value {
    beam_executable_spec {
      python_executor_spec {
        class_path: "tfx.components.example_gen.csv_example_gen.executor.Executor"
      }
    }
  }
}
executor_specs {
  key: "Evaluator"
  value {
 

running bdist_wheel
running build
running build_py
creating build
creating build/lib
copying credit_risk_transform.py -> build/lib
copying components.py -> build/lib
copying credit_risk_trainer.py -> build/lib
running install
running install_lib
copying build/lib/credit_risk_transform.py -> /tmp/tmpzspen42n
copying build/lib/components.py -> /tmp/tmpzspen42n
copying build/lib/credit_risk_trainer.py -> /tmp/tmpzspen42n
running install_egg_info
running egg_info
creating tfx_user_code_Trainer.egg-info
writing tfx_user_code_Trainer.egg-info/PKG-INFO
writing dependency_links to tfx_user_code_Trainer.egg-info/dependency_links.txt
writing top-level names to tfx_user_code_Trainer.egg-info/top_level.txt
writing manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
reading manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
writing manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
Copying tfx_user_code_Trainer.egg-info to /tmp/tmpzspen42n/tfx_user_code_Trainer-0.0+9685ff9411



INFO:absl:Node CsvExampleGen depends on [].
INFO:absl:Node CsvExampleGen is scheduled.
INFO:absl:Node Latest_blessed_model_resolver depends on [].
INFO:absl:Node Latest_blessed_model_resolver is scheduled.
INFO:absl:Node StatisticsGen depends on ['Run[CsvExampleGen]'].
INFO:absl:Node StatisticsGen is scheduled.
INFO:absl:Node SchemaGen depends on ['Run[StatisticsGen]'].
INFO:absl:Node SchemaGen is scheduled.
INFO:absl:Node ExampleValidator depends on ['Run[SchemaGen]', 'Run[StatisticsGen]'].
INFO:absl:Node ExampleValidator is scheduled.
INFO:absl:Node Transform depends on ['Run[CsvExampleGen]', 'Run[SchemaGen]'].
INFO:absl:Node Transform is scheduled.
INFO:absl:Node Trainer depends on ['Run[SchemaGen]', 'Run[Transform]'].
INFO:absl:Node Trainer is scheduled.
INFO:absl:Node Evaluator depends on ['Run[CsvExampleGen]', 'Run[Latest_blessed_model_resolver]', 'Run[Trainer]'].
INFO:absl:Node Evaluator is scheduled.
INFO:absl:Node Pusher depends on ['Run[Evaluator]', 'Run[Trainer]'].
INFO:absl

Processing ./output/derozari-pipeline/_wheels/tfx_user_code_Transform-0.0+9685ff9411bdb3299b1af50bc1b6674f7295c8280011687e73a3112cc521a507-py3-none-any.whl
Installing collected packages: tfx-user-code-Transform
Successfully installed tfx-user-code-Transform-0.0+9685ff9411bdb3299b1af50bc1b6674f7295c8280011687e73a3112cc521a507


INFO:absl:Successfully installed 'output/derozari-pipeline/_wheels/tfx_user_code_Transform-0.0+9685ff9411bdb3299b1af50bc1b6674f7295c8280011687e73a3112cc521a507-py3-none-any.whl'.
INFO:absl:udf_utils.get_fn {'module_file': None, 'module_path': 'credit_risk_transform@output/derozari-pipeline/_wheels/tfx_user_code_Transform-0.0+9685ff9411bdb3299b1af50bc1b6674f7295c8280011687e73a3112cc521a507-py3-none-any.whl', 'stats_options_updater_fn': None} 'stats_options_updater_fn'
INFO:absl:Installing 'output/derozari-pipeline/_wheels/tfx_user_code_Transform-0.0+9685ff9411bdb3299b1af50bc1b6674f7295c8280011687e73a3112cc521a507-py3-none-any.whl' to a temporary directory.
INFO:absl:Executing: ['/home/jorge/anaconda3/envs/a443-churn/bin/python', '-m', 'pip', 'install', '--target', '/tmp/tmp3wge4cuv', 'output/derozari-pipeline/_wheels/tfx_user_code_Transform-0.0+9685ff9411bdb3299b1af50bc1b6674f7295c8280011687e73a3112cc521a507-py3-none-any.whl']


Processing ./output/derozari-pipeline/_wheels/tfx_user_code_Transform-0.0+9685ff9411bdb3299b1af50bc1b6674f7295c8280011687e73a3112cc521a507-py3-none-any.whl
Installing collected packages: tfx-user-code-Transform
Successfully installed tfx-user-code-Transform-0.0+9685ff9411bdb3299b1af50bc1b6674f7295c8280011687e73a3112cc521a507


INFO:absl:Successfully installed 'output/derozari-pipeline/_wheels/tfx_user_code_Transform-0.0+9685ff9411bdb3299b1af50bc1b6674f7295c8280011687e73a3112cc521a507-py3-none-any.whl'.
INFO:absl:Installing 'output/derozari-pipeline/_wheels/tfx_user_code_Transform-0.0+9685ff9411bdb3299b1af50bc1b6674f7295c8280011687e73a3112cc521a507-py3-none-any.whl' to a temporary directory.
INFO:absl:Executing: ['/home/jorge/anaconda3/envs/a443-churn/bin/python', '-m', 'pip', 'install', '--target', '/tmp/tmp3ts2pmxw', 'output/derozari-pipeline/_wheels/tfx_user_code_Transform-0.0+9685ff9411bdb3299b1af50bc1b6674f7295c8280011687e73a3112cc521a507-py3-none-any.whl']


Processing ./output/derozari-pipeline/_wheels/tfx_user_code_Transform-0.0+9685ff9411bdb3299b1af50bc1b6674f7295c8280011687e73a3112cc521a507-py3-none-any.whl
Installing collected packages: tfx-user-code-Transform
Successfully installed tfx-user-code-Transform-0.0+9685ff9411bdb3299b1af50bc1b6674f7295c8280011687e73a3112cc521a507


INFO:absl:Successfully installed 'output/derozari-pipeline/_wheels/tfx_user_code_Transform-0.0+9685ff9411bdb3299b1af50bc1b6674f7295c8280011687e73a3112cc521a507-py3-none-any.whl'.
INFO:absl:Feature loan_grade has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature loan_intent has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature person_home_ownership has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature cb_person_cred_hist_length has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature cb_person_default_on_file has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature loan_amnt has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature loan_int_rate has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature loan_percent_income has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature loan_status has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Fe

Instructions for updating:
Use ref() instead.


Instructions for updating:
Use ref() instead.
INFO:absl:Feature loan_grade has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature loan_intent has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature person_home_ownership has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature cb_person_cred_hist_length has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature cb_person_default_on_file has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature loan_amnt has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature loan_int_rate has no shape. Setting to VarLenSparseTensor.
INFO:absl:Feature loan_percent_income has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature loan_status has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature person_age has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature person_emp_length has no shape. Setting to

INFO:tensorflow:Assets written to: output/derozari-pipeline/Transform/transform_graph/5/.temp_path/tftransform_tmp/f7056d76b11f4c1b8a92a21baaf6dfe2/assets


INFO:tensorflow:Assets written to: output/derozari-pipeline/Transform/transform_graph/5/.temp_path/tftransform_tmp/f7056d76b11f4c1b8a92a21baaf6dfe2/assets


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially more efficient implementation.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially more efficient implementation.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially more efficient implementation.


INFO:tensorflow:Assets written to: output/derozari-pipeline/Transform/transform_graph/5/.temp_path/tftransform_tmp/001be6f5cbf0465db2adc86db0f6d2e0/assets


INFO:tensorflow:Assets written to: output/derozari-pipeline/Transform/transform_graph/5/.temp_path/tftransform_tmp/001be6f5cbf0465db2adc86db0f6d2e0/assets
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially more efficient implementation.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially more efficient implementation.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially more efficient implementation.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 5 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'post_transform_stats': [Artifact(artifact: uri: "output/derozari-pipeline/Transform/post_transform_stats/5"
custom_properties {
  key: "name"
  value {
    string_value: "derozari-pipeline:20221117-160717.855111:Transform:post_transform_stats:0"
  }
}
custom_properties {
  key: "tfx_version"
  value {
    string_value: "1.7.0"
  }
}
, artifact_type: name: "ExampleStatistics"
properties {
  key: "span"
  value: INT
}
properties {
  key: "split_names"
  value: STRING
}
base_type: STATISTICS
)], 'pre_transform_schema': [Artifact(artifact: uri: "output/derozari-pipeline/Transform/pre_transform_schema/5"
custom_properties {
  key: "name"
  value {
    string_value: "derozari-pipeline:20221117-160717.855111:Transform:pre_transform_schema:0"
  }
}
cus

Processing ./output/derozari-pipeline/_wheels/tfx_user_code_Trainer-0.0+9685ff9411bdb3299b1af50bc1b6674f7295c8280011687e73a3112cc521a507-py3-none-any.whl
Installing collected packages: tfx-user-code-Trainer
Successfully installed tfx-user-code-Trainer-0.0+9685ff9411bdb3299b1af50bc1b6674f7295c8280011687e73a3112cc521a507


INFO:absl:Successfully installed 'output/derozari-pipeline/_wheels/tfx_user_code_Trainer-0.0+9685ff9411bdb3299b1af50bc1b6674f7295c8280011687e73a3112cc521a507-py3-none-any.whl'.
INFO:absl:Training model.


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 person_home_ownership_xf (Inpu  [(None, 6)]         0           []                               
 tLayer)                                                                                          
                                                                                                  
 loan_intent_xf (InputLayer)    [(None, 8)]          0           []                               
                                                                                                  
 loan_grade_xf (InputLayer)     [(None, 9)]          0           []                               
                                                                                                  
 person_age_xf (InputLayer)     [(None, 1)]          0           []                           

INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:Assets written to: output/derozari-pipeline/Trainer/model/6/Format-Serving/assets


INFO:tensorflow:Assets written to: output/derozari-pipeline/Trainer/model/6/Format-Serving/assets
INFO:absl:Training complete. Model written to output/derozari-pipeline/Trainer/model/6/Format-Serving. ModelRun written to output/derozari-pipeline/Trainer/model_run/6
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 6 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'model_run': [Artifact(artifact: uri: "output/derozari-pipeline/Trainer/model_run/6"
custom_properties {
  key: "name"
  value {
    string_value: "derozari-pipeline:20221117-160717.855111:Trainer:model_run:0"
  }
}
custom_properties {
  key: "tfx_version"
  value {
    string_value: "1.7.0"
  }
}
, artifact_type: name: "ModelRun"
)], 'model': [Artifact(artifact: uri: "output/derozari-pipeline/Trainer/model/6"
custom_properties {
  key: "name"
  value {
    string_value: "derozari-pipeline:20221117-160717.855111:Trainer:model:0"
  }




INFO:absl:The 'example_splits' parameter is not set, using 'eval' split.
INFO:absl:Evaluating model.
INFO:absl:udf_utils.get_fn {'eval_config': '{\n  "metrics_specs": [\n    {\n      "metrics": [\n        {\n          "class_name": "AUC"\n        },\n        {\n          "class_name": "Precision"\n        },\n        {\n          "class_name": "Recall"\n        },\n        {\n          "class_name": "ExampleCount"\n        },\n        {\n          "class_name": "BinaryAccuracy",\n          "threshold": {\n            "change_threshold": {\n              "absolute": 0.0001,\n              "direction": "HIGHER_IS_BETTER"\n            },\n            "value_threshold": {\n              "lower_bound": 0.5\n            }\n          }\n        }\n      ]\n    }\n  ],\n  "model_specs": [\n    {\n      "label_key": "cb_person_default_on_file"\n    }\n  ],\n  "slicing_specs": [\n    {},\n    {\n      "feature_keys": [\n        "person_home_ownership",\n        "loan_intent"\n      ]\n    }\n  ]



























  recall = tp / (tp + fn)
INFO:absl:Evaluation complete. Results written to output/derozari-pipeline/Evaluator/evaluation/7.
INFO:absl:Checking validation results.


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`
INFO:absl:Blessing result False written to output/derozari-pipeline/Evaluator/blessing/7.
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 7 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'blessing': [Artifact(artifact: uri: "output/derozari-pipeline/Evaluator/blessing/7"
custom_properties {
  key: "name"
  value {
    string_value: "derozari-pipeline:20221117-160717.855111:Evaluator:blessing:0"
  }
}
custom_properties {
  key: "tfx_version"
  value {
    string_value: "1.7.0"
  }
}
, artifact_type: name: "ModelBlessing"
)], 'evaluation': [Artifact(artifact: uri: "output/derozari-pipeline/Evaluator/evaluation/7"
custom_properties {
  key: "name"
  value {
    string_value: "derozari-pipeline:20221117-160717.855111:Evaluator:evaluation:0"
  }
}
custom_properties {
  key: "tfx_version"
  value {
    string_va