In [None]:
# this pipeline demonstrates how to use pipeline to create visuazliation for training results
# the visualization contains confusion metric, roc curve, and tabler
# 
# reference: https://github.com/kubeflow/pipelines/tree/master/samples/core/visualization

In [1]:
with open("requirements.txt", "w") as f:
    f.write("kfp==1.8.9\n")
    
!pip install -r requirements.txt  --upgrade --user

Collecting kfp==1.8.9
  Using cached kfp-1.8.9-py3-none-any.whl
Installing collected packages: kfp
  Attempting uninstall: kfp
    Found existing installation: kfp 1.8.13
    Uninstalling kfp-1.8.13:
      Successfully uninstalled kfp-1.8.13
Successfully installed kfp-1.8.9


In [2]:
import kfp.dsl as dsl
from kfp.components import create_component_from_func

from typing import NamedTuple
@create_component_from_func
def confusion_visualization(matrix_uri: str = 'https://raw.githubusercontent.com/kubeflow/pipelines/master/samples/core/visualization/confusion_matrix.csv') -> NamedTuple('VisualizationOutput', [('mlpipeline_ui_metadata', 'UI_metadata')]):
    """Provide confusion matrix csv file to visualize as metrics."""
    import json

    metadata = {
        'outputs' : [{
          'type': 'confusion_matrix',
          'format': 'csv',
          'schema': [
            {'name': 'target', 'type': 'CATEGORY'},
            {'name': 'predicted', 'type': 'CATEGORY'},
            {'name': 'count', 'type': 'NUMBER'},
          ],
          'source': matrix_uri,
          'labels': ['rose', 'lily', 'iris'],
        }]
    }

    from collections import namedtuple
    visualization_output = namedtuple('VisualizationOutput', [
        'mlpipeline_ui_metadata'])
    return visualization_output(json.dumps(metadata))

@create_component_from_func
def roc_visualization(roc_csv_uri: str='https://raw.githubusercontent.com/kubeflow/pipelines/master/samples/core/visualization/roc.csv') -> NamedTuple('VisualizationOutput', [('mlpipeline_ui_metadata', 'UI_metadata')]):
  """Provide roc curve csv file to visualize as metrics."""
  import json

  metadata = {
    'outputs': [{
      'type': 'roc',
      'format': 'csv',
      'schema': [
        {'name': 'fpr', 'type': 'NUMBER'},
        {'name': 'tpr', 'type': 'NUMBER'},
        {'name': 'thresholds', 'type': 'NUMBER'},
      ],
      'source': roc_csv_uri
    }]
  }

  from collections import namedtuple
  visualization_output = namedtuple('VisualizationOutput', [
    'mlpipeline_ui_metadata'])
  return visualization_output(json.dumps(metadata))

@create_component_from_func
def table_visualization(train_file_path: str = 'https://raw.githubusercontent.com/zijianjoy/pipelines/5651f41071816594b2ed27c88367f5efb4c60b50/samples/core/visualization/table.csv') -> NamedTuple('VisualizationOutput', [('mlpipeline_ui_metadata', 'UI_metadata')]):
  """Provide number to visualize as table metrics."""
  import json

  header = ['Average precision ', 'Precision', 'Recall']
  metadata = {
      'outputs' : [{
          'type': 'table',
          'storage': 'gcs',
          'format': 'csv',
          'header': header,
          'source': train_file_path
          }]
      }

  from collections import namedtuple
  visualization_output = namedtuple('VisualizationOutput', [
    'mlpipeline_ui_metadata'])
  return visualization_output(json.dumps(metadata))


In [3]:
import kfp
import kfp.dsl as dsl
import kfp.components as components

@dsl.pipeline(
   name='visualization pipeline',
   description='A pipeline to demonstrate visualization from kubeflow.'
)
def visualization_pipeline():
    confusion_visualization_task = confusion_visualization()
    roc_visualization_task = roc_visualization()
    table_visualization_task = table_visualization()

In [5]:
kfp.compiler.Compiler().compile(visualization_pipeline, 'helloworld.zip')