In [None]:
!python -m pip install --user --upgrade pip

Collecting pip
[?25l  Downloading https://files.pythonhosted.org/packages/fe/ef/60d7ba03b5c442309ef42e7d69959f73aacccd0d86008362a681c4698e83/pip-21.0.1-py3-none-any.whl (1.5MB)
[K     |████████████████████████████████| 1.5MB 7.7MB/s 
[?25hInstalling collected packages: pip
Successfully installed pip-21.0.1


In [None]:
!pip3 install kfp --upgrade --user

Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.
Collecting kfp
  Downloading kfp-1.4.0.tar.gz (159 kB)
[K     |████████████████████████████████| 159 kB 8.4 MB/s 
[?25hCollecting PyYAML>=5.3
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 11.0 MB/s 
Collecting kubernetes<12.0.0,>=8.0.0
  Downloading kubernetes-11.0.0-py3-none-any.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 16.2 MB/s 
Collecting requests_toolbelt>=0.8.0
  Downloading requests_toolbelt-0.9.1-py2.py3-none-any.whl (54 kB)
[K     |████████████████████████████████| 54 kB 2.3 MB/s 
Collecting kfp-server-api<2.0.0,>=1.1.2
  Downloading kfp-server-api-1.4.1.tar.gz (50 kB)
[K     |████████████████████████████████| 50 kB 5.0 MB/s 
[?25hCollecting jsonschema>=3.0.1
  Downloading jsonschema-3.2.0-py2.py3-

In [None]:
import kfp
from kfp import dsl
import kfp.components as comp

In [None]:
def load_data_op():
    return dsl.ContainerOp(
        name = 'Load Data',
        image = 'mavencodevv/load_cm:v.0.2',
        arguments = [],
        file_outputs={
            'data': '/load_data/data'
        }      
    )

In [None]:
def preprocess_op(data):
    return dsl.ContainerOp(
        name = 'Preprocess Data',
        image = 'mavencodevv/preprocess_cm:v.0.1',
        arguments = ['--data', data],
        file_outputs={
            'clean_data': '/preprocess/clean_data'   
        }
    )

In [None]:
def autoencoder_op(clean_data):
    return dsl.ContainerOp(
        name = 'Autoencoder model',
        image = 'mavencodevv/train_cm:v.0.3',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'fit_model': '/train_keras/fit_model' 
        }
    )

In [None]:
def pca_op(clean_data):
  return dsl.ContainerOp(
      name = "PCA model",
      image = "mavencodevv/pca_cm:v.0.1",
      arguments = ['--clean_data', clean_data],
      file_outputs = {'pca_metrics': '/pca/pca_metrics'}
  )

In [None]:
def test_op(clean_data, fit_model):
    return dsl.ContainerOp(
        name = 'Test autoencoder',
        image = 'mavencodevv/test_cm:v.0.4',
        arguments = ['--clean_data', clean_data,
                    '--fit_model', fit_model
            ],
        file_outputs={
            'metrics': '/test/metrics' 
        }
    )

In [None]:
def results(metrics, pca_metrics):
    return dsl.ContainerOp(
        name = 'Results',
        image = 'mavencodevv/results_cm:v.0.1',
        arguments = ['--metrics', metrics,
                    '--pca_metrics', pca_metrics
            ] 
        
    )

In [None]:
@dsl.pipeline(
    name='Condition monitoring pipeline',
   description='An ML reusable pipeline that monitors the wear and tear of a Factory machine'
)

# Define parameters to be fed into pipeline
def cm_pipeline(): 
    
    _load_data_op = load_data_op()
    
    _preprocess_op = preprocess_op(
        dsl.InputArgumentPath(_load_data_op.outputs['data'])).after(_load_data_op)
    
    _autoencoder_op = autoencoder_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)
    _pca_op = pca_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)
    _test_op = test_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data']),
        dsl.InputArgumentPath(_autoencoder_op.outputs['fit_model'])).after(_autoencoder_op)
    _results_op = results(
        dsl.InputArgumentPath(_test_op.outputs['metrics']),
        dsl.InputArgumentPath(_pca_op.outputs['pca_metrics'])).after(_test_op,_pca_op)
    

In [None]:
# Compile pipeline to generate compressed YAML definition of the pipeline.
experiment_name = 'cm_pipeline'

kfp.compiler.Compiler().compile(cm_pipeline, '{}.yaml'.format(experiment_name))



In [None]:
client = kfp.Client()
client.create_run_from_pipeline_func(cm_pipeline, arguments={})