# IMPORTING LIBRARY

In [5]:
import os
from typing import Text

from absl import logging
from tfx.orchestration import metadata, pipeline
from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner

# SET A VARIABEL

In [6]:
PIPELINE_NAME = "diabetes-prediction-pipeline"

# pipeline inputs
DATA_ROOT = "data"
TRANSFORM_MODULE_FILE = "modules/diabetes_prediction_transform.py"
TRAINER_MODULE_FILE = "modules/diabetes_prediction_trainer.py"
# requirement_file = os.path.join(root, "requirements.txt")

# pipeline outputs
OUTPUT_BASE = "output"
serving_model_dir = os.path.join(OUTPUT_BASE, 'serving_model')
pipeline_root = os.path.join(OUTPUT_BASE, PIPELINE_NAME)
metadata_path = os.path.join(pipeline_root, "metadata.sqlite")

#  RUN PIPELINE ORCHESTRA

In [7]:
def init_local_pipeline(
    components, pipeline_root: Text
) -> pipeline.Pipeline:

    logging.info(f"Pipeline root set to: {pipeline_root}")
    beam_args = [
        "--direct_running_mode=multi_processing"
        # 0 auto-detect based on on the number of CPUs available
        # during execution time.
        "----direct_num_workers=0"
    ]

    return pipeline.Pipeline(
        pipeline_name=PIPELINE_NAME,
        pipeline_root=pipeline_root,
        components=components,
        enable_cache=True,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            metadata_path
        ),
        eam_pipeline_args=beam_args
    )

In [8]:
if __name__ == "__main__":
    logging.set_verbosity(logging.INFO)

    from modules.components import init_components

    components = init_components(
        DATA_ROOT,
        training_module=TRAINER_MODULE_FILE,
        transform_module=TRANSFORM_MODULE_FILE,
        training_steps=5000,
        eval_steps=1000,
        serving_model_dir=serving_model_dir,
    )

    pipeline = init_local_pipeline(components, pipeline_root)
    BeamDagRunner().run(pipeline=pipeline)

INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Pipeline root set to: output\diabetes-prediction-pipeline
INFO:absl:Generating ephemeral wheel package for 'c:\\Users\\ROG G531\\diabetes prediction pipeline\\modules\\diabetes_prediction_transform.py' (including modules: ['components', 'diabetes_prediction_trainer', 'diabetes_prediction_transform']).
INFO:absl:User module package has hash fingerprint version 73b3cb8ae947384d99bdb8ed6713cdc5b5d98fc9fe6ae7c23cba4a78696c8b0b.
INFO:absl:Executing: ['c:\\Users\\ROG G531\\anaconda3\\envs\\a443-diabetes\\python.exe', 'C:\\Users\\ROGG53~1\\AppData\\Local\\Temp\\tmpcxku5vgn\\_tfx_generated_setup.py', 'bdist_wheel', '--bdist-dir', 'C:\\Users\\ROGG53~1\\AppData\\Local\\Temp\\tmppk_qo48b', '--dist-dir', 'C:\\Users\\ROGG53~1\\AppData\\Local\\Temp\\tmp6kr_9itl']


INFO:absl:Successfully built user code wheel distribution at 'output\\diabetes-prediction-pipeline\\_wheels\\tfx_user_code_Transform-0.0+73b3cb8ae947384d99bdb8ed6713cdc5b5d98fc9fe6ae7c23cba4a78696c8b0b-py3-none-any.whl'; target user module is 'diabetes_prediction_transform'.
INFO:absl:Full user module path is 'diabetes_prediction_transform@output\\diabetes-prediction-pipeline\\_wheels\\tfx_user_code_Transform-0.0+73b3cb8ae947384d99bdb8ed6713cdc5b5d98fc9fe6ae7c23cba4a78696c8b0b-py3-none-any.whl'
INFO:absl:Generating ephemeral wheel package for 'c:\\Users\\ROG G531\\diabetes prediction pipeline\\modules\\diabetes_prediction_trainer.py' (including modules: ['components', 'diabetes_prediction_trainer', 'diabetes_prediction_transform']).
INFO:absl:User module package has hash fingerprint version 73b3cb8ae947384d99bdb8ed6713cdc5b5d98fc9fe6ae7c23cba4a78696c8b0b.
INFO:absl:Executing: ['c:\\Users\\ROG G531\\anaconda3\\envs\\a443-diabetes\\python.exe', 'C:\\Users\\ROGG53~1\\AppData\\Local\\Temp\

INFO:absl:Node CsvExampleGen depends on [].
INFO:absl:Node CsvExampleGen is scheduled.
INFO:absl:Node Latest_blessed_model_resolver depends on [].
INFO:absl:Node Latest_blessed_model_resolver is scheduled.
INFO:absl:Node StatisticsGen depends on ['Run[CsvExampleGen]'].
INFO:absl:Node StatisticsGen is scheduled.
INFO:absl:Node SchemaGen depends on ['Run[StatisticsGen]'].
INFO:absl:Node SchemaGen is scheduled.
INFO:absl:Node ExampleValidator depends on ['Run[SchemaGen]', 'Run[StatisticsGen]'].
INFO:absl:Node ExampleValidator is scheduled.
INFO:absl:Node Transform depends on ['Run[CsvExampleGen]', 'Run[SchemaGen]'].
INFO:absl:Node Transform is scheduled.
INFO:absl:Node Trainer depends on ['Run[SchemaGen]', 'Run[Transform]'].
INFO:absl:Node Trainer is scheduled.
INFO:absl:Node Evaluator depends on ['Run[CsvExampleGen]', 'Run[Latest_blessed_model_resolver]', 'Run[Trainer]'].
INFO:absl:Node Evaluator is scheduled.
INFO:absl:Node Pusher depends on ['Run[Evaluator]', 'Run[Trainer]'].
INFO:absl