In [1]:
import os
import sys
from typing import Text

from absl import logging
from tfx.orchestration import metadata, pipeline
from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner
from modules.components import init_components

In [2]:
PIPELINE_NAME = "samuel_andrey-pipeline"

In [3]:
# pipeline inputs
DATA_ROOT = "data"
TRANSFORM_MODULE_FILE = "modules/heart_disease_transform.py"
TRAINER_MODULE_FILE = "modules/heart_disease_trainer.py"
# requirement_file = os.path.join(root, "requirements.txt")

In [4]:
# pipeline outputs
OUTPUT_BASE = "output"
serving_model_dir = os.path.join(OUTPUT_BASE, 'serving_model')
pipeline_root = os.path.join(OUTPUT_BASE, PIPELINE_NAME)
metadata_path = os.path.join(pipeline_root, "metadata.sqlite")

In [5]:
def init_local_pipeline(
        components, pipeline_root: Text
) -> pipeline.Pipeline:
    logging.info(f"Pipeline root set to: {pipeline_root}")
    beam_args = [
        "--direct_running_mode=multi_processing"
        # 0 auto-detect based on on the number of CPUs available 
        # during execution time.
        "----direct_num_workers=0"
    ]

    return pipeline.Pipeline(
        pipeline_name=PIPELINE_NAME,
        pipeline_root=pipeline_root,
        components=components,
        enable_cache=True,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            metadata_path
        ),
        eam_pipeline_args=beam_args
    )

In [6]:
logging.set_verbosity(logging.INFO)

components = init_components(
    DATA_ROOT,
    training_module=TRAINER_MODULE_FILE,
    transform_module=TRANSFORM_MODULE_FILE,
    training_steps=5000,
    eval_steps=1000,
    serving_model_dir=serving_model_dir,
)

pipeline = init_local_pipeline(components, pipeline_root)
BeamDagRunner().run(pipeline=pipeline)


INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Pipeline root set to: output\samuel_andrey-pipeline
INFO:absl:Generating ephemeral wheel package for 'C:\\Users\\samuel\\Documents\\06 - Course\\Machine Learning Operations (MLOps) dengan Cloudeka - Dicoding\\02 - Submission\\model-deployment-heart-disease-prediction\\modules\\heart_disease_transform.py' (including modules: ['components', 'heart_disease_trainer', 'heart_disease_transform']).
INFO:absl:User module package has hash fingerprint version 74f90c495f76eb85fbbb39865324c99adef966c1567f756ca1341b8337b9cec0.
INFO:absl:Executing: ['C:\\Users\\samuel\\miniconda3\\envs\\a443-churn-2\\python.exe', 'C:\\Users\\samuel\\AppData\\Local\\Temp\\tmpdtx5qocr\\_tfx_generated_setup.py', 'bdist_wheel', '--bdist-dir', 'C:\\Users\\samuel\\AppData\\Local\\Temp\\tmp6xnvecjt', '--dist-dir', 'C:\\Us

INFO:absl:Node CsvExampleGen depends on [].
INFO:absl:Node CsvExampleGen is scheduled.
INFO:absl:Node Latest_blessed_model_resolver depends on [].
INFO:absl:Node Latest_blessed_model_resolver is scheduled.
INFO:absl:Node StatisticsGen depends on ['Run[CsvExampleGen]'].
INFO:absl:Node StatisticsGen is scheduled.
INFO:absl:Node SchemaGen depends on ['Run[StatisticsGen]'].
INFO:absl:Node SchemaGen is scheduled.
INFO:absl:Node ExampleValidator depends on ['Run[SchemaGen]', 'Run[StatisticsGen]'].
INFO:absl:Node ExampleValidator is scheduled.
INFO:absl:Node Transform depends on ['Run[CsvExampleGen]', 'Run[SchemaGen]'].
INFO:absl:Node Transform is scheduled.
INFO:absl:Node Trainer depends on ['Run[SchemaGen]', 'Run[Transform]'].
INFO:absl:Node Trainer is scheduled.
INFO:absl:Node Evaluator depends on ['Run[CsvExampleGen]', 'Run[Latest_blessed_model_resolver]', 'Run[Trainer]'].
INFO:absl:Node Evaluator is scheduled.
INFO:absl:Node Pusher depends on ['Run[Evaluator]', 'Run[Trainer]'].
INFO:absl

Instructions for updating:
Use ref() instead.


Instructions for updating:
Use ref() instead.
INFO:absl:Feature Age has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature ChestPainType has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Cholesterol has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature ExerciseAngina has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature FastingBS has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature HeartDisease has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature MaxHR has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Oldpeak has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature RestingBP has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature RestingECG has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature ST_Slope has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Sex has a 

INFO:tensorflow:Assets written to: output\samuel_andrey-pipeline\Transform\transform_graph\5\.temp_path\tftransform_tmp\15fdee24aaed4c1da8079ef5654d1f04\assets


INFO:tensorflow:Assets written to: output\samuel_andrey-pipeline\Transform\transform_graph\5\.temp_path\tftransform_tmp\15fdee24aaed4c1da8079ef5654d1f04\assets


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially more efficient implementation.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially more efficient implementation.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially more efficient implementation.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially more efficient implementation.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or

INFO:tensorflow:Assets written to: output\samuel_andrey-pipeline\Transform\transform_graph\5\.temp_path\tftransform_tmp\199d204e0b2f4c7aa42514125f5a3ecb\assets


INFO:tensorflow:Assets written to: output\samuel_andrey-pipeline\Transform\transform_graph\5\.temp_path\tftransform_tmp\199d204e0b2f4c7aa42514125f5a3ecb\assets
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially more efficient implementation.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially more efficient implementation.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially more efficient implementation.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially

INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 5 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'post_transform_stats': [Artifact(artifact: uri: "output\\samuel_andrey-pipeline\\Transform\\post_transform_stats\\5"
, artifact_type: name: "ExampleStatistics"
properties {
  key: "span"
  value: INT
}
properties {
  key: "split_names"
  value: STRING
}
base_type: STATISTICS
)], 'updated_analyzer_cache': [Artifact(artifact: uri: "output\\samuel_andrey-pipeline\\Transform\\updated_analyzer_cache\\5"
, artifact_type: name: "TransformCache"
)], 'pre_transform_stats': [Artifact(artifact: uri: "output\\samuel_andrey-pipeline\\Transform\\pre_transform_stats\\5"
, artifact_type: name: "ExampleStatistics"
properties {
  key: "span"
  value: INT
}
properties {
  key: "split_names"
  value: STRING
}
base_type: STATISTICS
)], 'pre_transform_schema': [Arti

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Sex_xf (InputLayer)            [(None, 3)]          0           []                               
                                                                                                  
 ChestPainType_xf (InputLayer)  [(None, 5)]          0           []                               
                                                                                                  
 FastingBS_xf (InputLayer)      [(None, 3)]          0           []                               
                                                                                                  
 RestingECG_xf (InputLayer)     [(None, 4)]          0           []                               
                                                                                              

INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:Assets written to: output\samuel_andrey-pipeline\Trainer\model\7\Format-Serving\assets


INFO:tensorflow:Assets written to: output\samuel_andrey-pipeline\Trainer\model\7\Format-Serving\assets


You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


INFO:absl:Training complete. Model written to output\samuel_andrey-pipeline\Trainer\model\7\Format-Serving. ModelRun written to output\samuel_andrey-pipeline\Trainer\model_run\7
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 7 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'model_run': [Artifact(artifact: uri: "output\\samuel_andrey-pipeline\\Trainer\\model_run\\7"
, artifact_type: name: "ModelRun"
)], 'model': [Artifact(artifact: uri: "output\\samuel_andrey-pipeline\\Trainer\\model\\7"
, artifact_type: name: "Model"
base_type: MODEL
)]}) for execution 7
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:node Trainer is finished.
INFO:absl:node Evaluator is running.
INFO:absl:Running launcher for node_info {
  type {
    name: "tfx.components.evaluator.component.Evaluator"
    base_type: EVALUATE
  }
  id: "Evaluator"
}
contexts {
  contexts {
    type {
      name: "pipelin



INFO:absl:The 'example_splits' parameter is not set, using 'eval' split.
INFO:absl:Evaluating model.
INFO:absl:udf_utils.get_fn {'example_splits': 'null', 'eval_config': '{\n  "metrics_specs": [\n    {\n      "metrics": [\n        {\n          "class_name": "AUC"\n        },\n        {\n          "class_name": "Precision"\n        },\n        {\n          "class_name": "Recall"\n        },\n        {\n          "class_name": "ExampleCount"\n        },\n        {\n          "class_name": "BinaryAccuracy",\n          "threshold": {\n            "change_threshold": {\n              "absolute": 0.0001,\n              "direction": "HIGHER_IS_BETTER"\n            },\n            "value_threshold": {\n              "lower_bound": 0.5\n            }\n          }\n        }\n      ]\n    }\n  ],\n  "model_specs": [\n    {\n      "label_key": "HeartDisease"\n    }\n  ],\n  "slicing_specs": [\n    {},\n    {\n      "feature_keys": [\n        "Sex"\n      ]\n    }\n  ]\n}', 'fairness_indicator_thr



























INFO:absl:Evaluation complete. Results written to output\samuel_andrey-pipeline\Evaluator\evaluation\8.
INFO:absl:Checking validation results.


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`
INFO:absl:Blessing result True written to output\samuel_andrey-pipeline\Evaluator\blessing\8.
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 8 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'blessing': [Artifact(artifact: uri: "output\\samuel_andrey-pipeline\\Evaluator\\blessing\\8"
, artifact_type: name: "ModelBlessing"
)], 'evaluation': [Artifact(artifact: uri: "output\\samuel_andrey-pipeline\\Evaluator\\evaluation\\8"
, artifact_type: name: "ModelEvaluation"
)]}) for execution 8
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:node Evaluator is finished.
INFO:absl:node Pusher is running.
INFO:absl:Running launcher for node_info {
  type {
    name: "tfx.components.pusher.component.Pusher"
    base_type: DEPLOY
  }
  id: "Pusher"
}
contexts {
  contexts {
    type {
      name: "pipeline