In [1]:
!python -m pip install --user --upgrade pip

Collecting pip
[?25l  Downloading https://files.pythonhosted.org/packages/ac/cf/0cc542fc93de2f3b9b53cb979c7d1118cffb93204afb46299a9f858e113f/pip-21.1-py3-none-any.whl (1.5MB)
[K     |████████████████████████████████| 1.6MB 5.8MB/s 
[?25hInstalling collected packages: pip
Successfully installed pip-21.1


In [2]:
!pip3 install kfp --upgrade --user

Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.
Collecting kfp
  Downloading kfp-1.4.0.tar.gz (159 kB)
[K     |████████████████████████████████| 159 kB 5.7 MB/s 
[?25hCollecting PyYAML>=5.3
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 17.1 MB/s 
Collecting kubernetes<12.0.0,>=8.0.0
  Downloading kubernetes-11.0.0-py3-none-any.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 18.7 MB/s 
Collecting requests_toolbelt>=0.8.0
  Downloading requests_toolbelt-0.9.1-py2.py3-none-any.whl (54 kB)
[K     |████████████████████████████████| 54 kB 2.8 MB/s 
Collecting kfp-server-api<2.0.0,>=1.1.2
  Downloading kfp-server-api-1.5.0.tar.gz (50 kB)
[K     |████████████████████████████████| 50 kB 7.0 MB/s 
[?25hCollecting jsonschema>=3.0.1
  Downloading jsonschema-3.2.0-py2.py3-

In [9]:
import kfp
from kfp import dsl
import kfp.components as comp

In [10]:
def load_data_op():
    return dsl.ContainerOp(
        name = 'Load Data',
        image = 'mavencodevv/load_bird:v.0.1',
        arguments = [],
        file_outputs={
            'img_folder': '/load_data/img_folder'
        }  
    )

In [11]:
def km_op(img_folder):
    return dsl.ContainerOp(
        name = 'Keras Model',
        image = 'mavencodevv/km_bird:v.0.1',
        arguments = ['--img_folder', img_folder],
        file_outputs={
            'km': '/keras_model/km.h5',
            'model_json': '/keras_model/model_json',
            'keras_metrics': '/keras_model/keras_metrics'
        }
    )

In [16]:
def pyt_op(img_folder):
    return dsl.ContainerOp(
        name = 'Pytorch Model',
        image = 'mavencodevv/pyt_bird:v.0.1',
        arguments = ['--img_folder', img_folder],
        file_outputs={
            'pytorch_model': '/pyt_model/pytorch_model.pt',
            'pytorch_metrics': '/pyt_model/pytorch_metrics'
        }  
    )

In [17]:
def eval_op(img_folder, keras_metrics, model_json, km_model, pytorch_metrics, pytorch_model):
    return dsl.ContainerOp(
        name = 'Evaluating Models',
        image = 'mavencodevv/eval_bird:v.0.1',
        arguments = ['--img_folder', img_folder, 
                     '--keras_metrics', keras_metrics,
                     '--model_json', model_json,
                     '--km_model', km_model,
                     '--pytorch_metrics', pytorch_metrics,
                     '--pytorch_model', pytorch_model
                     ]
 
    )

In [18]:
@dsl.pipeline(
    name='Bird Image Classification',
   description='An ML pipeline which trains pytorch and keras models in image classification of birds'
)

# Define parameters to be fed into pipeline
def image_pipeline():

  volume_op = dsl.VolumeOp(
    name="data_volume",
    resource_name="data-volume",
    size="1Gi",
    modes=dsl.VOLUME_MODE_RWO)
  
  _load_data_op = load_data_op().add_pvolumes({'/mnt': volume_op.volume})

  _km_op = km_op(dsl.InputArgumentPath(_load_data_op.outputs['img_folder'])).add_pvolumes({'/mnt': _load_data_op.pvolume})

  _pyt_op = pyt_op(dsl.InputArgumentPath(_load_data_op.outputs['img_folder'])).add_pvolumes({'/mnt': _load_data_op.pvolume})

  _eval_op = eval_op(dsl.InputArgumentPath(_load_data_op.outputs['img_folder']),
                     dsl.InputArgumentPath(_km_op.outputs['keras_metrics']),
                     dsl.InputArgumentPath(_km_op.outputs['model_json']),
                     dsl.InputArgumentPath(_km_op.outputs['km']),
                     dsl.InputArgumentPath(_pyt_op.outputs['pytorch_metrics']),
                     dsl.InputArgumentPath(_pyt_op.outputs['pytorch_model'])).add_pvolumes({'/mnt': _load_data_op.pvolume})


In [19]:
# Compile pipeline to generate compressed YAML definition of the pipeline.
experiment_name = 'birdproject'

kfp.compiler.Compiler().compile(image_pipeline, '{}.yaml'.format(experiment_name))

