<a href="https://colab.research.google.com/github/NetskyA/Machine_Learning_Pipeline/blob/main/Pengembangan_Machine_Learning_Pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install tfx
!pip install matplotlib-venn

In [None]:
import os
import tensorflow as tf
import tfx
from tfx.components import CsvExampleGen, StatisticsGen, SchemaGen, ExampleValidator, Transform, Trainer, Evaluator, Pusher
from tfx.dsl.components.common.resolver import Resolver
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext
from tfx.orchestration.pipeline import Pipeline
from tfx.orchestration.local.local_dag_runner import LocalDagRunner
from tfx.orchestration.metadata import sqlite_metadata_connection_config
from tfx.proto import example_gen_pb2, trainer_pb2, pusher_pb2
from tfx.dsl.experimental import latest_blessed_model_resolver
from tfx.types import Channel
from tfx.types.standard_artifacts import Model, ModelBlessing
from google.protobuf import text_format
from tensorflow_metadata.proto.v0 import schema_pb2
from tensorflow_model_analysis.proto import config_pb2

In [None]:
# Load dataset
DATA_PATH = './sample_data/spam.csv'

# Convert CSV to TFRecord format
example_gen = CsvExampleGen(input_base=DATA_PATH)


In [None]:
# Define the base paths
pipeline_name = 'AldiAfendiyanto_pipeline'
pipeline_root = os.path.join('pipeline_output', pipeline_name)
metadata_path = os.path.join(pipeline_root, 'metadata.sqlite')
transform_module_file = os.path.join(os.getcwd(), 'sample_data', 'transform_module.py')
trainer_module_file = os.path.join(os.getcwd(), 'sample_data', 'trainer_module.py')

# Instantiate the components
example_gen = CsvExampleGen(input_base='sample_data/spam.csv')

statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'], infer_feature_shape=True)
example_validator = ExampleValidator(statistics=statistics_gen.outputs['statistics'], schema=schema_gen.outputs['schema'])

transform = Transform(
    examples=example_gen.outputs['examples'],
    schema=schema_gen.outputs['schema'],
    module_file=transform_module_file
)

trainer = Trainer(
    module_file=trainer_module_file,
    custom_config={'transform_output': transform.outputs['transform_graph']},
    examples=transform.outputs['transformed_examples'],
    transform_graph=transform.outputs['transform_graph'],
    schema=schema_gen.outputs['schema'],
    train_args=trainer_pb2.TrainArgs(num_steps=1000),
    eval_args=trainer_pb2.EvalArgs(num_steps=500)
)

resolver = Resolver(
    strategy_class=latest_blessed_model_resolver.LatestBlessedModelResolver,
    model=Channel(type=Model),
    model_blessing=Channel(type=ModelBlessing)
)

eval_config = config_pb2.EvalConfig(
    model_specs=[config_pb2.ModelSpec(label_key='label')],
    metrics_specs=[config_pb2.MetricsSpec(
        metrics=[config_pb2.MetricConfig(class_name='BinaryAccuracy')],
        thresholds={'binary_accuracy': config_pb2.MetricThreshold(
            value_threshold=config_pb2.GenericValueThreshold(lower_bound={'value': 0.7})
        )}
    )]
)

evaluator = Evaluator(
    examples=example_gen.outputs['examples'],
    model=trainer.outputs['model'],
    eval_config=eval_config
)

pusher = Pusher(
    model=trainer.outputs['model'],
    model_blessing=evaluator.outputs['blessing'],
    push_destination=pusher_pb2.PushDestination(
        filesystem=pusher_pb2.PushDestination.Filesystem(base_directory=os.path.join(pipeline_root, 'model_serving'))
    )
)

# Define the pipeline
pipeline = Pipeline(
    pipeline_name=pipeline_name,
    pipeline_root=pipeline_root,
    components=[
        example_gen,
        statistics_gen,
        schema_gen,
        example_validator,
        transform,
        trainer,
        resolver,
        evaluator,
        pusher
    ],
    metadata_connection_config=sqlite_metadata_connection_config(metadata_path),
    enable_cache=True
)

# Create an InteractiveContext with explicit metadata connection config
context = InteractiveContext(pipeline_root=pipeline_root, metadata_connection_config=sqlite_metadata_connection_config(metadata_path))

# Run the pipeline
context.run(pipeline)

0,1
.execution_id,21
.component,<tfx.orchestration.pipeline.Pipeline object at 0x788a03a8e7a0>
.component.inputs,{}
.component.outputs,{}
