In [1]:
!pip install tfx

Collecting tfx
  Downloading tfx-1.16.0-py3-none-any.whl.metadata (37 kB)
Collecting ml-pipelines-sdk==1.16.0 (from tfx)
  Downloading ml_pipelines_sdk-1.16.0-py3-none-any.whl.metadata (33 kB)
Collecting ml-metadata<1.17.0,>=1.16.0 (from tfx)
  Downloading ml_metadata-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.0 kB)
Collecting google-apitools<1,>=0.5 (from tfx)
  Downloading google_apitools-0.5.32-py3-none-any.whl.metadata (2.3 kB)
Collecting google-api-python-client<2,>=1.8 (from tfx)
  Downloading google_api_python_client-1.12.11-py2.py3-none-any.whl.metadata (4.2 kB)
Collecting apache-beam<3,>=2.47 (from apache-beam[gcp]<3,>=2.47->tfx)
  Downloading apache_beam-2.63.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.3 kB)
Collecting attrs<24,>=19.3.0 (from tfx)
  Downloading attrs-23.2.0-py3-none-any.whl.metadata (9.5 kB)
Collecting kubernetes<27,>=10.0.1 (from tfx)
  Downloading kubernetes-26.1.0-py2.py3-none-any.whl.metadata

In [2]:
import pandas as pd
import numpy as np


df = pd.read_csv("/kaggle/input/heart-disease-dataset/heart.csv")
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [3]:
_components_script = '''
"""Initiate tfx pipeline components"""

import os

import tensorflow_model_analysis as tfma
from tfx.components import (
    CsvExampleGen,
    Evaluator,
    ExampleValidator,
    Pusher,
    SchemaGen,
    StatisticsGen,
    Trainer,
    Transform,
    Tuner,
)
from tfx.dsl.components.common.resolver import Resolver
from tfx.dsl.input_resolution.strategies.latest_blessed_model_strategy import (
    LatestBlessedModelStrategy,
)
from tfx.proto import example_gen_pb2, pusher_pb2, trainer_pb2
from tfx.types import Channel
from tfx.types.standard_artifacts import Model, ModelBlessing

def init_components(args):  # pylint: disable=too-many-locals
    """Initiate tfx pipeline components

    Args:
        args (dict): args that contain pipeline configuration

    Returns:
        TFX components
    """
    output = example_gen_pb2.Output(
        split_config=example_gen_pb2.SplitConfig(
            splits=[
                example_gen_pb2.SplitConfig.Split(name="train", hash_buckets=8),
                example_gen_pb2.SplitConfig.Split(name="eval", hash_buckets=2),
            ]
        )
    )

    example_gen = CsvExampleGen(input_base=args["data_dir"], output_config=output)

    statistics_gen = StatisticsGen(examples=example_gen.outputs["examples"])

    schema_gen = SchemaGen(statistics=statistics_gen.outputs["statistics"])

    example_validator = ExampleValidator(
        statistics=statistics_gen.outputs["statistics"],
        schema=schema_gen.outputs["schema"],
    )

    transform = Transform(
        examples=example_gen.outputs["examples"],
        schema=schema_gen.outputs["schema"],
        module_file=os.path.abspath(args["transform_module"]),
    )

    tuner = Tuner(
        module_file=os.path.abspath(args["tuner_module"]),
        examples=transform.outputs["transformed_examples"],
        transform_graph=transform.outputs["transform_graph"],
        schema=schema_gen.outputs["schema"],
        train_args=trainer_pb2.TrainArgs(
            splits=["train"],
            num_steps=args["train_steps"],
        ),
        eval_args=trainer_pb2.EvalArgs(
            splits=["eval"],
            num_steps=args["eval_steps"],
        ),
    )

    trainer = Trainer(
        module_file=args["trainer_module"],
        examples=transform.outputs["transformed_examples"],
        transform_graph=transform.outputs["transform_graph"],
        schema=schema_gen.outputs["schema"],
        hyperparameters=tuner.outputs["best_hyperparameters"],
        train_args=trainer_pb2.TrainArgs(
            splits=["train"],
            num_steps=args["train_steps"],
        ),
        eval_args=trainer_pb2.EvalArgs(
            splits=["eval"],
            num_steps=args["eval_steps"],
        ),
    )


    model_resolver = Resolver(
        strategy_class=LatestBlessedModelStrategy,
        model=Channel(type=Model),
        model_blessing=Channel(type=ModelBlessing),
    ).with_id("Latest_blessed_model_resolver")

    
    
    # Konfigurasi metrik evaluasi
    metrics_specs = [
        tfma.MetricsSpec(
            metrics=[
                tfma.MetricConfig(class_name="ExampleCount"),
                tfma.MetricConfig(class_name="AUC"),
                tfma.MetricConfig(class_name="FalsePositives"),
                tfma.MetricConfig(class_name="TruePositives"),
                tfma.MetricConfig(class_name="FalseNegatives"),
                tfma.MetricConfig(class_name="TrueNegatives"),
                tfma.MetricConfig(
                    class_name="BinaryAccuracy",
                    threshold=tfma.MetricThreshold(
                        value_threshold=tfma.GenericValueThreshold(
                            lower_bound={"value": 0.5}  
                        ),
                        change_threshold=tfma.GenericChangeThreshold(
                            direction=tfma.MetricDirection.HIGHER_IS_BETTER,
                            absolute={"value": 0.0001}, 
                        ),
                    ),
                ),
            ]
        )
    ]

    # Konfigurasi evaluasi model dengan baseline
    eval_config = tfma.EvalConfig(
        model_specs=[
            tfma.ModelSpec(label_key="target", signature_name="serving_default"),
        ],
        slicing_specs=[
            tfma.SlicingSpec(),  # Evaluasi secara keseluruhan
        ],
        metrics_specs=metrics_specs,
    )
    
    # Evaluator
    evaluator = Evaluator(
        examples=example_gen.outputs["examples"],
        model=trainer.outputs["model"],
        baseline_model=model_resolver.outputs["model"],
        eval_config=eval_config,
    )


    pusher = Pusher(
        model=trainer.outputs["model"],
        model_blessing=evaluator.outputs["blessing"],
        push_destination=pusher_pb2.PushDestination(
            filesystem=pusher_pb2.PushDestination.Filesystem(
                base_directory=args["serving_model_dir"]
            )
        ),
    )


    components = (
        example_gen,
        statistics_gen,
        schema_gen,
        example_validator,
        transform,
        tuner,
        trainer,
        model_resolver,
        evaluator,
        pusher,
    )

    return components

'''

with open("components.py","w") as f:
    f.write(_components_script)

In [4]:
_pipeline_script = '''

"""Pipeline module"""

from typing import Text

from absl import logging
from tfx.orchestration import metadata, pipeline


def init_pipeline(
    pipeline_root: Text, pipeline_name, metadata_path, components
) -> pipeline.Pipeline:
    """Initiate tfx pipeline

    Args:
        pipeline_root (Text): path to pipeline directory
        pipeline_name (str): pipeline name
        metadata_path (str): path to metadata directory
        components (dict): tfx components

    Returns:
        pipeline.Pipeline: pipeline orchestration
    """
    logging.set_verbosity(logging.INFO)

    beam_args = [
        "--direct_running_mode=multi_processing",
        "----direct_num_workers=0",
    ]

    return pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        components=components,
        enable_cache=True,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            metadata_path,
        ),
        beam_pipeline_args=beam_args,
    )

'''

with open ("pipeline.py", "w") as f:
    f.write(_pipeline_script)

In [5]:
_transform_script = '''

"""Transform module"""

import tensorflow as tf
import tensorflow_transform as tft

CATEGORICAL_FEATURES = {
    "sex": 2,
    "cp": 4,
    "fbs": 2,
    "restecg": 3,
    "exang": 2,
    "slope": 3,
    "ca": 4,  # Bisa bernilai 0-3, jadi ada 4 kategori
    "thal": 3, # Bisa bernilai 0-2, jadi ada 3 kategori
}

NUMERICAL_FEATURES = [
    "age",
    "trestbps",
    "chol",
    "thalach",
    "oldpeak",
]

LABEL_KEY = "target"


def transformed_name(key):
    """Renaming transformed features"""
    return key + "_xf"


def convert_num_to_one_hot(label_tensor, num_labels=2):
    """
    Convert a label (0 or 1) into a one-hot vector
    Args:
        int: label_tensor (0 or 1)
    Returns
        label tensor
    """
    one_hot_tensor = tf.one_hot(label_tensor, num_labels)
    return tf.reshape(one_hot_tensor, [-1, num_labels])


def preprocessing_fn(inputs):
    """
    Preprocess input features into transformed features

    Args:
        inputs: map from feature keys to raw features.

    Return:
        outputs: map from feature keys to transformed features.
    """

    outputs = {}

    for key in CATEGORICAL_FEATURES:  # pylint: disable=consider-using-dict-items
        dim = CATEGORICAL_FEATURES[key]
        int_value = tft.compute_and_apply_vocabulary(inputs[key], top_k=dim + 1)
        outputs[transformed_name(key)] = convert_num_to_one_hot(
            int_value, num_labels=dim + 1
        )

    for feature in NUMERICAL_FEATURES:
        outputs[transformed_name(feature)] = tft.scale_to_0_1(inputs[feature])

    outputs[transformed_name(LABEL_KEY)] = tf.cast(inputs[LABEL_KEY], tf.int64)

    return outputs

'''

with open("transform.py","w") as f:
    f.write(_transform_script)

In [6]:
_tuner_script = '''
"""Tuner module"""

from typing import Any, Dict, NamedTuple, Text

import keras_tuner as kt
import tensorflow as tf
import tensorflow_transform as tft
from keras import layers
from keras_tuner.engine import base_tuner
import sys
sys.path.append("/kaggle/working/")
import transform,tuner



from transform import (
    CATEGORICAL_FEATURES,
    LABEL_KEY,
    NUMERICAL_FEATURES,
    transformed_name,
)

NUM_EPOCHS = 5

TunerFnResult = NamedTuple(
    "TunerFnResult",
    [
        ("tuner", base_tuner.BaseTuner),
        ("fit_kwargs", Dict[Text, Any]),
    ],
)

early_stop = tf.keras.callbacks.EarlyStopping(
    monitor="val_binary_accuracy",
    mode="max",
    verbose=1,
    patience=10,
    restore_best_weights=True,
)


def gzip_reader_fn(filenames):
    """Loads compression data

    Args:
        filenames (str): a path to the data directory

    Returns:
        TfRecord: Compressed data
    """

    return tf.data.TFRecordDataset(filenames, compression_type="GZIP")


def input_fn(file_pattern, tf_transform_output, batch_size=64):
    """Generated features and labels for tuning/training

    Args:
        file_pattern: input tf_record file pattern
        tf_transform_output: a TFTransformOutput
        batch_size: representing the number of consecutive elements of
        returned dataset to combine in a single batch. Defaults to 64.

    Returns:
        a dataset that contains (featurs, indices) tuple where features
        is a dictionary of Tensors, and indices is a single Tensor of
        label indices
    """

    transform_feature_spec = tf_transform_output.transformed_feature_spec().copy()

    dataset = tf.data.experimental.make_batched_features_dataset(
        file_pattern=file_pattern,
        batch_size=batch_size,
        features=transform_feature_spec,
        reader=gzip_reader_fn,
        label_key=transformed_name(LABEL_KEY),
    )

    return dataset


def get_model_tuner(hp):
    """This function defines a hyperparameters to tune for keras Model

    Args:
        hp (kt.HyperParameters): object to setting hyperparameters

    Returns:
        tf.keras.Model: Keras model object
    """

    n_layers = hp.Int("n_layers", min_value=1, max_value=5, step=1)
    dense_units = hp.Int(
        "dense_units",
        min_value=16,
        max_value=128,
        step=32,
    )
    lr = hp.Choice("lr", values=[1e-2, 1e-3, 1e-4])

    input_features = []

    for key, dim in CATEGORICAL_FEATURES.items():
        input_features.append(
            layers.Input(shape=(dim + 1,), name=transformed_name(key))
        )

    for feature in NUMERICAL_FEATURES:
        input_features.append(layers.Input(shape=(1,), name=transformed_name(feature)))

    concatenate = layers.concatenate(input_features)
    x = layers.Dense(dense_units, activation=tf.nn.relu)(concatenate)

    for _ in range(n_layers):
        x = layers.Dense(dense_units, activation=tf.nn.relu)(x)

    x = layers.Dropout(0.25)(x)

    outputs = layers.Dense(1, activation="sigmoid")(x)

    model = tf.keras.Model(inputs=input_features, outputs=outputs)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
        loss=tf.keras.losses.BinaryCrossentropy(),
        metrics=["binary_accuracy"],
    )

    model.summary()

    return model


def tuner_fn(fn_args):
    """Tune the model to get the best hyperparameters

    Args:
        fn_args (FnArgs): Holds args used to train the model as name/value pair

    Returns:
        TunerFnResult (NamedTuple): object to run model tuner
    """

    tf_transform_output = tft.TFTransformOutput(fn_args.transform_graph_path)

    train_set = input_fn(fn_args.train_files[0], tf_transform_output)
    eval_set = input_fn(fn_args.eval_files[0], tf_transform_output)

    tuner = kt.Hyperband(
        hypermodel=get_model_tuner,
        objective=kt.Objective("binary_accuracy", direction="max"),
        max_epochs=NUM_EPOCHS,
        factor=3,
        directory=fn_args.working_dir,
        project_name="kt_hyperband",
    )
    # Pastikan max_trials memiliki nilai default jika None
    if tuner.oracle.max_trials is None:
        tuner.oracle.max_trials = 10  # Atur nilai default sesuai kebutuhan


    return TunerFnResult(
        tuner=tuner,
        fit_kwargs={
            "x": train_set,
            "validation_data": eval_set,
            "steps_per_epoch": fn_args.train_steps,
            "validation_steps": fn_args.eval_steps,
            "callbacks": [early_stop],
        },
    )
'''

with open("tuner.py", 'w') as f:
    f.write(_tuner_script)


In [7]:
_trainer_script = '''

"""Training module"""

import os

import tensorflow as tf
import tensorflow_transform as tft
from tensorflow.keras.utils import plot_model
import sys
sys.path.append("/kaggle/working/")
import transform,tuner

from transform import (
    CATEGORICAL_FEATURES,
    LABEL_KEY,
    NUMERICAL_FEATURES,
    transformed_name,
)
from tuner import early_stop, gzip_reader_fn


def get_model(hp):
    """This function defines a keras Model with the best hyperparameters from tuning

    Args:
        hp (kt.HyperParameters): object that contains hyperparameters tuning configuration

    Returns:
        tf.keras.Model: model as a Keras object
    """

    # one-hot categorical features
    input_features = []
    for key, dim in CATEGORICAL_FEATURES.items():
        input_features.append(
            tf.keras.Input(shape=(dim + 1,), name=transformed_name(key))
        )

    for feature in NUMERICAL_FEATURES:
        input_features.append(
            tf.keras.Input(shape=(1,), name=transformed_name(feature))
        )

    concatenate = tf.keras.layers.concatenate(input_features)
    x = tf.keras.layers.Dense(hp.get("dense_units"), activation=tf.nn.relu)(concatenate)

    for _ in range(hp.get("n_layers")):
        x = tf.keras.layers.Dense(hp.get("dense_units"), activation="relu")(x)

    x = tf.keras.layers.Dropout(0.25)(x)
    outputs = tf.keras.layers.Dense(1, activation="sigmoid")(x)

    model = tf.keras.models.Model(inputs=input_features, outputs=outputs)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=hp.get("lr")),
        loss="binary_crossentropy",
        metrics=[tf.keras.metrics.BinaryAccuracy()],
    )

    model.summary()

    return model


def get_serve_tf_examples_fn(model, tf_transform_output):
    """Returns a function that parses a serialized tf.Example."""

    model.tft_layer = tf_transform_output.transform_features_layer()

    @tf.function
    def serve_tf_examples_fn(serialized_tf_examples):
        """Returns the output to be used in the serving signature."""
        feature_spec = tf_transform_output.raw_feature_spec()
        feature_spec.pop(LABEL_KEY)
        parsed_features = tf.io.parse_example(serialized_tf_examples, feature_spec)

        transformed_features = model.tft_layer(parsed_features)

        outputs = model(transformed_features)
        return {"outputs": outputs}

    return serve_tf_examples_fn


def input_fn(file_pattern, tf_transform_output, batch_size=64):
    """Generates features and labels for tuning/training.
    Args:
        file_pattern: input tfrecord file pattern.
        tf_transform_output: A TFTransformOutput.
        batch_size: representing the number of consecutive elements of
        returned dataset to combine in a single batch
    Returns:
        A dataset that contains (features, indices) tuple where features
        is a dictionary of Tensors, and indices is a single Tensor of
        label indices.
    """
    transformed_feature_spec = tf_transform_output.transformed_feature_spec().copy()

    dataset = tf.data.experimental.make_batched_features_dataset(
        file_pattern=file_pattern,
        batch_size=batch_size,
        features=transformed_feature_spec,
        reader=gzip_reader_fn,
        label_key=transformed_name(LABEL_KEY),
    )

    return dataset


# TFX Trainer will call this function.
def run_fn(fn_args):
    """Train the model based on given args.
    Args:
    fn_args: Holds args used to train the model as name/value pairs.
    """
    tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)
    hp = fn_args.hyperparameters["values"]

    train_dataset = input_fn(fn_args.train_files, tf_transform_output, 64)
    eval_dataset = input_fn(fn_args.eval_files, tf_transform_output, 64)

    model = get_model(hp)

    log_dir = os.path.join(os.path.dirname(fn_args.serving_model_dir), "logs")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=log_dir, update_freq="batch"
    )

    mc = tf.keras.callbacks.ModelCheckpoint(
        os.path.join(fn_args.serving_model_dir, "model.keras"),
        monitor="val_binary_accuracy",
        mode="max",
        verbose=1,
        save_best_only=True,
    )

    # Train the model
    model.fit(
        x=train_dataset,
        validation_data=eval_dataset,
        callbacks=[tensorboard_callback, early_stop, mc],
        steps_per_epoch=fn_args.train_steps,
        validation_steps=fn_args.eval_steps,
        epochs=hp.get("tuner/epochs"),
    )

    signatures = {
        "serving_default": get_serve_tf_examples_fn(
            model, tf_transform_output
        ).get_concrete_function(
            tf.TensorSpec(shape=[None], dtype=tf.string, name="examples")
        ),
    }
    tf.saved_model.save(model, fn_args.serving_model_dir, signatures=signatures)

    plot_model(
        model, to_file="/kaggle/working/model_plot.png", show_shapes=True, show_layer_names=True
    )
'''

with open("train.py","w") as f:
    f.write(_trainer_script)

In [8]:
import sys
sys.path.append("/kaggle/working/")
import pipeline

import components as components_module

In [9]:
import os
from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner

In [10]:
PIPELANE_NAME = "jabir_muktabir-pipeline"

# Pipeline inputs
DATA_ROOT = "/kaggle/input/heart-disease-dataset"
TRANSFORM_MODULE_FILE = "/kaggle/working/transform.py"
TUNER_MODULE_FILE = "/kaggle/working/tuner.py"
TRAINER_MODULE_FILE = "/kaggle/working/train.py"

# Pipeline outputs
OUTPUT_BASE = "outputs"

serving_model_dir = os.path.join(OUTPUT_BASE, "serving_model")
pipeline_root = os.path.join(OUTPUT_BASE, PIPELANE_NAME)
metadata_path = os.path.join(pipeline_root, "metadata.sqlite")

In [11]:
components_args = {
    "data_dir": DATA_ROOT,
    "trainer_module": TRAINER_MODULE_FILE,
    "tuner_module": TUNER_MODULE_FILE,
    "transform_module": TRANSFORM_MODULE_FILE,
    "train_steps": 20,
    "eval_steps": 10,
    "serving_model_dir": serving_model_dir,
}

pipeline_components = components_module.init_components(components_args)

pipeline = pipeline.init_pipeline(
    pipeline_root, PIPELANE_NAME, metadata_path, pipeline_components
)
BeamDagRunner().run(pipeline)

Trial 10 Complete [00h 00m 04s]
binary_accuracy: 0.6499999761581421

Best binary_accuracy So Far: 0.9546874761581421
Total elapsed time: 00h 00m 36s
Results summary
Results in outputs/jabir_muktabir-pipeline/Tuner/.system/executor_execution/7/.temp/7/kt_hyperband
Showing 10 best trials
Objective(name="binary_accuracy", direction="max")

Trial 05 summary
Hyperparameters:
n_layers: 1
dense_units: 112
lr: 0.01
tuner/epochs: 5
tuner/initial_epoch: 2
tuner/bracket: 1
tuner/round: 1
tuner/trial_id: 04
Score: 0.9546874761581421

Trial 06 summary
Hyperparameters:
n_layers: 5
dense_units: 112
lr: 0.01
tuner/epochs: 5
tuner/initial_epoch: 2
tuner/bracket: 1
tuner/round: 1
tuner/trial_id: 03
Score: 0.9429687261581421

Trial 08 summary
Hyperparameters:
n_layers: 1
dense_units: 16
lr: 0.01
tuner/epochs: 5
tuner/initial_epoch: 0
tuner/bracket: 0
tuner/round: 0
Score: 0.90234375

Trial 04 summary
Hyperparameters:
n_layers: 1
dense_units: 112
lr: 0.01
tuner/epochs: 2
tuner/initial_epoch: 0
tuner/brack

Epoch 1/5
[1m14/20[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m0s[0m 4ms/step - binary_accuracy: 0.7677 - loss: 0.4981
Epoch 1: val_binary_accuracy improved from -inf to 0.84375, saving model to outputs/jabir_muktabir-pipeline/Trainer/model/8/Format-Serving/model.keras
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - binary_accuracy: 0.7886 - loss: 0.4639 - val_binary_accuracy: 0.8438 - val_loss: 0.3420
Epoch 2/5
[1m14/20[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m0s[0m 4ms/step - binary_accuracy: 0.8889 - loss: 0.3399
Epoch 2: val_binary_accuracy did not improve from 0.84375
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - binary_accuracy: 0.8938 - loss: 0.3196 - val_binary_accuracy: 0.8266 - val_loss: 0.3806
Epoch 3/5
[1m15/20[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m0s[0m 4ms/step - binary_accuracy: 0.9200 - loss: 0.2186
Epoch 3: val_binary_accuracy did not improve from 0.84375
[1m20/20[0m [32m━━━━━━━━━━━━━━━

In [12]:
import shutil
shutil.make_archive('/kaggle/working/pipeline', 'zip', '/kaggle/working/outputs')

'/kaggle/working/pipeline.zip'

In [13]:
import shutil
shutil.make_archive('/kaggle/working/serving_model', 'zip', '/kaggle/working/outputs/serving_model')

'/kaggle/working/serving_model.zip'