## 1. Directly convert function to component

In [20]:
!python3 -m pip install kfp --upgrade

Defaulting to user installation because normal site-packages is not writeable
Requirement already up-to-date: kfp in ./.local/lib/python3.6/site-packages (0.2.5)


In [1]:
import kfp
import kfp.gcp
import kfp.dsl as dsl
import kfp.compiler
import kfp.components

In [5]:
client = kfp.Client()

### Define a function

In [9]:
#Define a Python function
def add(a: float, b: float) -> float:
   '''Calculates sum of two arguments'''
   return a + b

### Convert the function to a component

In [10]:
add_op = kfp.components.func_to_container_op(add, base_image="tensorflow/tensorflow:1.15.0-py3")

### Construct the pipeline

In [11]:
@dsl.pipeline(
    name='Calculation pipeline',
    description='A toy pipeline that performs arithmetic calculations.'
)
def calc_pipeline(
    a='a',
):
    #Passing pipeline parameter and a constant value as operation arguments
    add_task = add_op(a, 4) #Returns a dsl.ContainerOp class instance. 

### Submit and run the Pipeline

In [12]:
pipeline_func = calc_pipeline
experiment_name = 'python-functions'

#Specify pipeline argument values
arguments = {'a': '6'}

run_name = pipeline_func.__name__ + ' run'

# Submit pipeline directly from pipeline function
run_result = client.create_run_from_pipeline_func(pipeline_func, 
                                                  experiment_name=experiment_name, 
                                                  run_name=run_name, 
                                                  arguments=arguments)

## More complicated function

In [15]:
# Advanced function
# Demonstrates imports, helper functions and multiple outputs
from typing import NamedTuple

def my_divmod(dividend: float, 
              divisor: float,
             ) -> NamedTuple('MyDivmodOutput', [('quotient', float), ('remainder', float), 
                                                ('mlpipeline_ui_metadata', 'UI_metadata'), 
                                                ('mlpipeline_metrics', 'Metrics')]):
    
    '''Divides two numbers and calculate  the quotient and remainder'''
    
    #Imports inside a component function:
    import numpy as np

    #This function demonstrates how to use nested functions inside a component function:
    def divmod_helper(dividend, divisor):
        return np.divmod(dividend, divisor)

    (quotient, remainder) = divmod_helper(dividend, divisor)

    import json
    
    # Exports a sample tensorboard:
    metadata = {
      'outputs' : [{
        'type': 'tensorboard',
        'source': 'gs://ml-pipeline-dataset/tensorboard-train',
      }]
    }

    # Exports two sample metrics:
    metrics = {
      'metrics': [{
          'name': 'quotient',
          'numberValue':  float(quotient),
        },{
          'name': 'remainder',
          'numberValue':  float(remainder),
        }]}

    from collections import namedtuple
    divmod_output = namedtuple('MyDivmodOutput', 
                               ['quotient', 'remainder', 'mlpipeline_ui_metadata', 'mlpipeline_metrics'])
    return divmod_output(quotient, remainder, json.dumps(metadata), json.dumps(metrics))

In [17]:
# convert to component
divmod_op = kfp.components.func_to_container_op(func=my_divmod, 
                                      base_image="tensorflow/tensorflow:1.15.0-py3")

### Build the pipeline

In [18]:
import kfp.dsl as dsl
@dsl.pipeline(
    name='Calculation pipeline',
    description='A toy pipeline that performs arithmetic calculations.'
)
def add_div_pipeline(
    a='a',
    b='7',
    c='17',
):
    #Passing pipeline parameter and a constant value as operation arguments
    add_task = add_op(a, 4) #Returns a dsl.ContainerOp class instance. 
    
    #Passing a task output reference as operation arguments
    #For an operation with a single return value, the output reference can be accessed using `task.output` or `task.outputs['output_name']` syntax
    divmod_task = divmod_op(add_task.output, b)

    #For an operation with a multiple return values, the output references can be accessed using `task.outputs['output_name']` syntax
    result_task = add_op(divmod_task.outputs['quotient'], c)

### Submit and run the Pipeline

In [20]:
pipeline_func = add_div_pipeline

experiment_name = 'python-functions' # same as above

#Specify pipeline argument values
arguments = {'a': '6', 'b': '2', 'c': '5'}

run_name = pipeline_func.__name__ + ' run'

# Submit pipeline directly from pipeline function
run_result = client.create_run_from_pipeline_func(pipeline_func, 
                                                  experiment_name=experiment_name, 
                                                  run_name=run_name, 
                                                  arguments=arguments)