In [None]:
!python -m pip install --user --upgrade pip

Collecting pip
[?25l  Downloading https://files.pythonhosted.org/packages/fe/ef/60d7ba03b5c442309ef42e7d69959f73aacccd0d86008362a681c4698e83/pip-21.0.1-py3-none-any.whl (1.5MB)
[K     |▏                               | 10kB 18.4MB/s eta 0:00:01[K     |▍                               | 20kB 25.6MB/s eta 0:00:01[K     |▋                               | 30kB 19.9MB/s eta 0:00:01[K     |▉                               | 40kB 18.1MB/s eta 0:00:01[K     |█                               | 51kB 16.0MB/s eta 0:00:01[K     |█▎                              | 61kB 17.8MB/s eta 0:00:01[K     |█▌                              | 71kB 13.0MB/s eta 0:00:01[K     |█▊                              | 81kB 12.1MB/s eta 0:00:01[K     |██                              | 92kB 11.7MB/s eta 0:00:01[K     |██▏                             | 102kB 11.5MB/s eta 0:00:01[K     |██▍                             | 112kB 11.5MB/s eta 0:00:01[K     |██▌                             | 122kB 11.5MB/s et

In [None]:
!pip3 install kfp --upgrade --user

Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.
Collecting kfp
  Downloading kfp-1.4.0.tar.gz (159 kB)
[K     |████████████████████████████████| 159 kB 11.8 MB/s 
[?25hCollecting PyYAML>=5.3
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 22.3 MB/s 
Collecting kubernetes<12.0.0,>=8.0.0
  Downloading kubernetes-11.0.0-py3-none-any.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 33.2 MB/s 
Collecting requests_toolbelt>=0.8.0
  Downloading requests_toolbelt-0.9.1-py2.py3-none-any.whl (54 kB)
[K     |████████████████████████████████| 54 kB 1.5 MB/s 
Collecting kfp-server-api<2.0.0,>=1.1.2
  Downloading kfp-server-api-1.4.1.tar.gz (50 kB)
[K     |████████████████████████████████| 50 kB 3.9 MB/s 
[?25hCollecting jsonschema>=3.0.1
  Downloading jsonschema-3.2.0-py2.py3

In [None]:
import kfp
from kfp import dsl
import kfp.components as comp

In [None]:
def load_data_op():
    return dsl.ContainerOp(
        name = 'Load Data',
        image = 'mavencodevv/load_los:v.0.1',
        arguments = [],
        file_outputs={
            'data': '/load_data/data'
        }      
    )

In [None]:
def preprocess_op(data):
    return dsl.ContainerOp(
        name = 'Preprocess Data',
        image = 'mavencodevv/preprocess_los:v.0.1',
        arguments = ['--data', data],
        file_outputs={
            'clean_data': '/preprocess/clean_data'   
        }
    )

In [None]:
def train_op(clean_data):
    return dsl.ContainerOp(
        name = 'Train model',
        image = 'mavencodevv/train_los:v.0.1',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'fit_model': '/train_model/fit_model' 
        }
    )

In [None]:
def test_op(clean_data, fit_model):
    return dsl.ContainerOp(
        name = 'Test model',
        image = 'mavencodevv/test_los:v.0.1',
        arguments = ['--clean_data', clean_data,
                    '--fit_model', fit_model
            ],
        file_outputs={
            'metrics': '/test_model/metrics' 
        }
    )

In [None]:
def results(metrics):
    return dsl.ContainerOp(
        name = 'Results',
        image = 'mavencodevv/results_los:v.0.1',
        arguments = ['--metrics', metrics
            ] 
        
    )

In [None]:
@dsl.pipeline(
    name='Hospital Length of Stay Pipeline',
   description='An ML reusable pipeline that predicts how long a patient will be admitted in the hospital'
)

# Define parameters to be fed into pipeline
def los_pipeline(): 
    
    _load_data_op = load_data_op()
    
    _preprocess_op = preprocess_op(
        dsl.InputArgumentPath(_load_data_op.outputs['data'])).after(_load_data_op)
    
    _train_op = train_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)
    
    _test_op = test_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data']),
        dsl.InputArgumentPath(_train_op.outputs['fit_model'])).after(_train_op)
    _results_op = results(
        dsl.InputArgumentPath(_test_op.outputs['metrics'])).after(_test_op)
    

In [None]:
# Compile pipeline to generate compressed YAML definition of the pipeline.
experiment_name = 'length_of_stay_pipeline'
#kfp.compiler.Compiler().compile(churn_reuseable_tensorflow_pipeline,  
#  '{}.zip'.format(experiment_name))
kfp.compiler.Compiler().compile(los_pipeline, '{}.yaml'.format(experiment_name))



In [None]:
client = kfp.Client()
client.create_run_from_pipeline_func(ld_pipeline, arguments={})