In [None]:
# TensorFlow Extended와 TensorFlow가 깔려 있다면 이 셀은 스킵.
!pip install tfx
!pip install tensorflow

In [1]:
import os
import pathlib
import shutil

import tensorflow as tf
import tensorflow_model_analysis as tfma
import tfx
from tfx.components import (ImportExampleGen,
                            StatisticsGen, SchemaGen, ExampleValidator,
                            Transform, Trainer, Evaluator, Pusher)
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext
from tfx.proto import pusher_pb2
from tfx.types import Channel
from tfx.types.standard_artifacts import Model, ModelBlessing
from tfx.v1.dsl import Resolver
from tfx.v1.dsl.experimental import LatestBlessedModelStrategy

2023-01-14 03:55:56.211802: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
model_dir = "models/1"
model_ver = os.path.split(model_dir)[-1]
trained_model = tf.saved_model.load(model_dir)

2023-01-13 09:36:41.018043: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


[TFMA에 아직도 `EvalSharedModel`이 꼭 필요한가?](https://www.tensorflow.org/tfx/model_analysis/faq#is_an_evalsavedmodel_still_required)에 대한 답변.

In [3]:
# SavedModel을 TFMA로 평가하기 위해 필요한 객체 형식(`EvalSharedModel`)으로 불러들여야 한다.
eval_shared_model = tfma.default_eval_shared_model(
    eval_saved_model_path=model_dir,
    tags=[tf.saved_model.SERVING]
)

In [8]:
eval_data_file = os.path.join("data", "complaints", "splits",
                              "small-from-eval.tfrecord")  # VM instance에 CPU만 달려 있어 추론 시간 상 100 example들만.
eval_result_location = os.path.join("metadata", "eval_results", model_ver)

In [2]:
eval_config = tfma.EvalConfig(
    # 모델 정답 레이블 지정.
    model_specs=[
        # `tfma.ModelSpec.model_type=tfma.TF_LITE`로 설정하면 TFLite 모델도 분석 가능.
        tfma.ModelSpec(label_key="consumer_disputed",
                       # signature_name="serving_default",
                       # preprocessing_function_names="serving_default"
                       )],
    # 데이터 슬라이스: 여기선 전체 데이터셋의 모든 피처를 대상으로 평가한다. 특정 피처 대상으로 분석도 가능.
    slicing_specs=[tfma.SlicingSpec()],
    # `EvalSharedModel`을 대상으로 평가하려는 지표 나열.
    metrics_specs=[tfma.MetricsSpec(metrics=[
        tfma.MetricConfig(class_name="BinaryAccuracy"),
        tfma.MetricConfig(class_name="ExampleCount"),
        tfma.MetricConfig(class_name="FalsePositives"),
        tfma.MetricConfig(class_name="TruePositives"),
        tfma.MetricConfig(class_name="FalseNegatives"),
        tfma.MetricConfig(class_name="TrueNegatives"), ])])


In [5]:
# 모델 분석 실행
eval_result = tfma.run_model_analysis(
    eval_shared_model=eval_shared_model,
    eval_config=eval_config,
    data_location=eval_data_file,
    output_path=eval_result_location,
    # `data_location`에서 지정한 평가 파일 형식 지정.
    file_format="tfrecords")





Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


In [6]:
eval_result

EvalResult(slicing_metrics=[((), {'': {'': {'example_count': {'doubleValue': 100.0}, 'binary_accuracy': {'doubleValue': 0.7}, 'false_positives': {'doubleValue': 0.0}, 'true_positives': {'doubleValue': 0.0}, 'false_negatives': {'doubleValue': 30.0}, 'true_negatives': {'doubleValue': 70.0}}}})], plots=[((), None)], attributions=[((), None)], config=model_specs {
  label_key: "consumer_disputed"
}
slicing_specs {
}
metrics_specs {
  metrics {
    class_name: "BinaryAccuracy"
  }
  metrics {
    class_name: "ExampleCount"
  }
  metrics {
    class_name: "FalsePositives"
  }
  metrics {
    class_name: "TruePositives"
  }
  metrics {
    class_name: "FalseNegatives"
  }
  metrics {
    class_name: "TrueNegatives"
  }
  model_names: ""
}
, data_location='data/complaints/splits/small-from-eval.tfrecord', file_format='tfrecords', model_location='models/1')

In [7]:
tfma.view.render_slicing_metrics(eval_result)

SlicingMetricsViewer(config={'weightedExamplesColumn': 'example_count'}, data=[{'slice': 'Overall', 'metrics':…

# 복수 모델 비교
그래프 구조는 동일. 스텝만 조정.
       | train_steps | eval_steps |
model1 |    100      |     50     |
model2 |    500      |     100    |


In [9]:
model_dir2 = "models/2"
model_ver2 = os.path.split(model_dir2)[-1]
eval_result_location2 = os.path.join("metadata", "eval_results", model_ver2)

eval_shared_model2 = tfma.default_eval_shared_model(
    eval_saved_model_path=model_dir2,
    tags=[tf.saved_model.SERVING])

In [10]:
eval_result2 = tfma.run_model_analysis(
    eval_shared_model=eval_shared_model2,
    eval_config=eval_config,
    data_location=eval_data_file,
    output_path=eval_result_location2,
    # `data_location`에서 지정한 평가 파일 형식 지정.
    file_format="tfrecords")





Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


In [11]:
eval_result_locations = [eval_result_location, eval_result_location2]

eval_results_from_disk = tfma.load_eval_results(
    eval_result_locations)

In [12]:
tfma.view.render_time_series(eval_results_from_disk)

TimeSeriesViewer(config={'isModelCentric': True}, data=[{'metrics': {'': {'': {'example_count': {'doubleValue'…

# TFX 컴포넌트를 이용한 분석

blessing: 검사할 모델이 기준으로 삼은 모델(baseline이라고 부른다)보다 더 낫고, 설정한 평가 기준을
넘는다면 그 모델에게 합격을 주는 일을 말한다.


사용되는 컴포넌트:
  - `Resolver`: 여러 모델 간 어떤 것을 선택할지 결정하는 역할을 `Evaluator` 의 일부로 담당.
    - 단 이 컴포넌트는 다른 컴포넌트와 달리 `tfx.v1.dsl`에서 찾아야 한다. `v1`은 레거시로 남은
        모양인데, 이게 지금은 어떻게 바뀌었는지 조사해볼 만한 주제다.
  - `Evaluator`: 모델들의 지표를 분석 비교해 blessing 여부를 결정한다.
  - `Pusher`: blessed 상태 모델을 내보내는 역할을 한다.

In [2]:
def copy_artifact(component, root_dir):
    for key, output in component.outputs.items():
        src = output.get()[0].uri
        dest = pathlib.Path(os.path.join(root_dir, key))
        shutil.copytree(src, dest)

In [3]:
context = InteractiveContext()



In [4]:
record_dir = "data/complaints/records"
example_gen = ImportExampleGen(input_base=record_dir)
context.run(example_gen)
examples = example_gen.outputs["examples"]



In [5]:
artifact_dir = pathlib.Path("artifacts")

In [7]:
copy_artifact(example_gen, artifact_dir)

In [4]:
statistics_gen = StatisticsGen(examples=examples)
context.run(statistics_gen)
stats = statistics_gen.outputs["statistics"]

In [5]:
schema_gen = SchemaGen(statistics=stats, infer_feature_shape=True)
context.run(schema_gen)
schema = schema_gen.outputs["schema"]

In [6]:
example_validator = ExampleValidator(statistics=stats, schema=schema)
context.run(example_validator)
anomalies = example_validator.outputs["anomalies"]

In [7]:
transform = Transform(examples=examples, schema=schema, module_file="transform_module.py")
context.run(transform)
transform_graph = transform.outputs["transform_graph"]
transformed_examples = transform.outputs["transformed_examples"]

running bdist_wheel
running build
running build_py
creating build
creating build/lib
copying transform_module.py -> build/lib
copying trainer_module.py -> build/lib
copying hello.py -> build/lib
installing to /tmp/tmpwnyukrst
running install
running install_lib
copying build/lib/transform_module.py -> /tmp/tmpwnyukrst
copying build/lib/trainer_module.py -> /tmp/tmpwnyukrst
copying build/lib/hello.py -> /tmp/tmpwnyukrst
running install_egg_info
running egg_info
creating tfx_user_code_Transform.egg-info
writing tfx_user_code_Transform.egg-info/PKG-INFO
writing dependency_links to tfx_user_code_Transform.egg-info/dependency_links.txt
writing top-level names to tfx_user_code_Transform.egg-info/top_level.txt
writing manifest file 'tfx_user_code_Transform.egg-info/SOURCES.txt'
reading manifest file 'tfx_user_code_Transform.egg-info/SOURCES.txt'
writing manifest file 'tfx_user_code_Transform.egg-info/SOURCES.txt'
Copying tfx_user_code_Transform.egg-info to /tmp/tmpwnyukrst/tfx_user_code_Trans



Processing /tmp/tfx-interactive-2023-01-13T16_17_31.357854-nhcsbetp/_wheels/tfx_user_code_Transform-0.0+3100132ed8ea60cb8773f45a92a4da856ccdce929d65b49a4bf28e3401c1242f-py3-none-any.whl
Installing collected packages: tfx-user-code-Transform
Successfully installed tfx-user-code-Transform-0.0+3100132ed8ea60cb8773f45a92a4da856ccdce929d65b49a4bf28e3401c1242f
Processing /tmp/tfx-interactive-2023-01-13T16_17_31.357854-nhcsbetp/_wheels/tfx_user_code_Transform-0.0+3100132ed8ea60cb8773f45a92a4da856ccdce929d65b49a4bf28e3401c1242f-py3-none-any.whl
Installing collected packages: tfx-user-code-Transform
Successfully installed tfx-user-code-Transform-0.0+3100132ed8ea60cb8773f45a92a4da856ccdce929d65b49a4bf28e3401c1242f
Processing /tmp/tfx-interactive-2023-01-13T16_17_31.357854-nhcsbetp/_wheels/tfx_user_code_Transform-0.0+3100132ed8ea60cb8773f45a92a4da856ccdce929d65b49a4bf28e3401c1242f-py3-none-any.whl
Installing collected packages: tfx-user-code-Transform
Successfully installed tfx-user-code-Transfor

2023-01-13 16:17:54.192466: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Instructions for updating:
Use ref() instead.


Instructions for updating:
Use ref() instead.


INFO:tensorflow:Assets written to: /tmp/tfx-interactive-2023-01-13T16_17_31.357854-nhcsbetp/Transform/transform_graph/5/.temp_path/tftransform_tmp/c1af09b0ae4147c58bd8616c6203559e/assets


INFO:tensorflow:Assets written to: /tmp/tfx-interactive-2023-01-13T16_17_31.357854-nhcsbetp/Transform/transform_graph/5/.temp_path/tftransform_tmp/c1af09b0ae4147c58bd8616c6203559e/assets


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:Assets written to: /tmp/tfx-interactive-2023-01-13T16_17_31.357854-nhcsbetp/Transform/transform_graph/5/.temp_path/tftransform_tmp/7c969daf519f42aba0266952fbed05ec/assets


INFO:tensorflow:Assets written to: /tmp/tfx-interactive-2023-01-13T16_17_31.357854-nhcsbetp/Transform/transform_graph/5/.temp_path/tftransform_tmp/7c969daf519f42aba0266952fbed05ec/assets


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


In [8]:
trainer = Trainer(examples=transformed_examples,
                  transform_graph=transform_graph,
                  schema=schema, module_file="trainer_module.py",
                  train_args=tfx.proto.trainer_pb2.TrainArgs(num_steps=50),
                  eval_args=tfx.proto.trainer_pb2.EvalArgs(num_steps=10))
context.run(trainer)
trained_model = trainer.outputs["model"]

running bdist_wheel
running build
running build_py
creating build
creating build/lib
copying transform_module.py -> build/lib
copying trainer_module.py -> build/lib
copying hello.py -> build/lib
installing to /tmp/tmpofzlq_n1
running install
running install_lib
copying build/lib/transform_module.py -> /tmp/tmpofzlq_n1
copying build/lib/trainer_module.py -> /tmp/tmpofzlq_n1
copying build/lib/hello.py -> /tmp/tmpofzlq_n1
running install_egg_info
running egg_info
creating tfx_user_code_Trainer.egg-info
writing tfx_user_code_Trainer.egg-info/PKG-INFO
writing dependency_links to tfx_user_code_Trainer.egg-info/dependency_links.txt
writing top-level names to tfx_user_code_Trainer.egg-info/top_level.txt
writing manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
reading manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
writing manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
Copying tfx_user_code_Trainer.egg-info to /tmp/tmpofzlq_n1/tfx_user_code_Trainer-0.0+3100132ed



Processing /tmp/tfx-interactive-2023-01-13T16_17_31.357854-nhcsbetp/_wheels/tfx_user_code_Trainer-0.0+3100132ed8ea60cb8773f45a92a4da856ccdce929d65b49a4bf28e3401c1242f-py3-none-any.whl
Installing collected packages: tfx-user-code-Trainer
Successfully installed tfx-user-code-Trainer-0.0+3100132ed8ea60cb8773f45a92a4da856ccdce929d65b49a4bf28e3401c1242f




Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:Assets written to: /tmp/tfx-interactive-2023-01-13T16_17_31.357854-nhcsbetp/Trainer/model/6/Format-Serving/assets


INFO:tensorflow:Assets written to: /tmp/tfx-interactive-2023-01-13T16_17_31.357854-nhcsbetp/Trainer/model/6/Format-Serving/assets


In [9]:
model_resolver = Resolver(
    strategy_class=LatestBlessedModelStrategy,
    model=Channel(type=Model),
    model_blessing=Channel(type=ModelBlessing)
).with_id("latest_blessed_model_resolver")
context.run(model_resolver)

0,1
.execution_id,7
.component,<tfx.dsl.components.common.resolver.Resolver object at 0x7f7e575956a0>
.component.inputs,['model']<tfx.types.resolved_channel.ResolvedChannel object at 0x7f7e5759bdf0>['model_blessing']<tfx.types.resolved_channel.ResolvedChannel object at 0x7f7e57649b80>
.component.outputs,['model'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Model' (0 artifacts) at 0x7f7e573502e0.type_nameModel._artifacts[]['model_blessing'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ModelBlessing' (0 artifacts) at 0x7f7e57350a30.type_nameModelBlessing._artifacts[]

0,1
['model'],<tfx.types.resolved_channel.ResolvedChannel object at 0x7f7e5759bdf0>
['model_blessing'],<tfx.types.resolved_channel.ResolvedChannel object at 0x7f7e57649b80>

0,1
['model'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Model' (0 artifacts) at 0x7f7e573502e0.type_nameModel._artifacts[]
['model_blessing'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ModelBlessing' (0 artifacts) at 0x7f7e57350a30.type_nameModelBlessing._artifacts[]

0,1
.type_name,Model
._artifacts,[]

0,1
.type_name,ModelBlessing
._artifacts,[]


In [10]:
eval_config = tfma.EvalConfig(
    model_specs=[tfma.ModelSpec(label_key="consumer_disputed")],
    slicing_specs=[tfma.SlicingSpec(), tfma.SlicingSpec(feature_keys=["product"])],
    metrics_specs=[
        tfma.MetricsSpec(
            metrics=[
                tfma.MetricConfig(class_name="BinaryAccuracy"),
                tfma.MetricConfig(class_name="ExampleCount"),
                tfma.MetricConfig(class_name="AUC")],
            # baseline 모델과 비교해 우위에 있더라도 아래 임계치를 넘어야 bless를 받는다.
            thresholds={
                "AUC": tfma.MetricThreshold(
                    value_threshold=tfma.GenericValueThreshold(
                        lower_bound={"value": 0.65}),
                    # 두 모델 간 지표 ∆가 0.01은 넘어야 하고, 새 모델 지표값은 클수록 좋다는 의미.
                    change_threshold=tfma.GenericChangeThreshold(
                        direction=tfma.MetricDirection.HIGHER_IS_BETTER,
                        absolute={"value": 0.01}))})])

In [11]:
# baseline 모델을 기준으로 incoming 모델이 더 나은지 판단한다.
# CPU-only 머신은 평가에 시간이 걸려 `examples`를 100이 되게 생성했다.
evaluator = Evaluator(
    examples=examples,
    model=trained_model,
    baseline_model=model_resolver.outputs["model"],
    eval_config=eval_config)

In [None]:
context.run(evaluator)





In [27]:
root_dir = pathlib.Path("artifacts")

artifacts/examples


In [None]:
eval_result = evaluator.outputs["evaluation"].get()[0].uri
tfma_result = tfma.load_eval_result(eval_result)

blessing = evaluator.outputs["blessing"]

In [None]:
output_path = evaluator.outputs['evaluation'].get()[0].uri

# Load the evaluation results.
eval_result = tfma.load_eval_result(output_path)

# Load the validation results
validation_result = tfma.load_validation_result(output_path)

### Pusher

In [None]:
_serving_model_dir = "serving_model_dir"

pusher = Pusher(model=trained_model,
                model_blessing=blessing,
                push_destination=pusher_pb2.PushDestination(filesystem=pusher_pb2.PushDestination.Filesystem(base_directory=_serving_model_dir)))
context.run(pusher)