In [None]:
!python -m pip install --user --upgrade pip

Collecting pip
[?25l  Downloading https://files.pythonhosted.org/packages/fe/ef/60d7ba03b5c442309ef42e7d69959f73aacccd0d86008362a681c4698e83/pip-21.0.1-py3-none-any.whl (1.5MB)
[K     |████████████████████████████████| 1.5MB 5.5MB/s 
[?25hInstalling collected packages: pip
Successfully installed pip-21.0.1


In [None]:
!pip3 install kfp --upgrade --user

Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.
Collecting kfp
  Downloading kfp-1.4.0.tar.gz (159 kB)
[K     |████████████████████████████████| 159 kB 5.7 MB/s 
[?25hCollecting PyYAML>=5.3
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 16.2 MB/s 
Collecting kubernetes<12.0.0,>=8.0.0
  Downloading kubernetes-11.0.0-py3-none-any.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 13.7 MB/s 
Collecting requests_toolbelt>=0.8.0
  Downloading requests_toolbelt-0.9.1-py2.py3-none-any.whl (54 kB)
[K     |████████████████████████████████| 54 kB 2.1 MB/s 
Collecting kfp-server-api<2.0.0,>=1.1.2
  Downloading kfp-server-api-1.4.1.tar.gz (50 kB)
[K     |████████████████████████████████| 50 kB 5.1 MB/s 
[?25hCollecting jsonschema>=3.0.1
  Downloading jsonschema-3.2.0-py2.py3-

In [None]:
import kfp
from kfp import dsl
import kfp.components as comp

In [None]:
def load_data_op():
    return dsl.ContainerOp(
        name = 'Load Data',
        image = 'mavencodevv/load_ca:v.0.1',
        arguments = [],
        file_outputs={
            'data': '/load_data/data'
        }      
    )

In [None]:
def preprocess_op(data):
    return dsl.ContainerOp(
        name = 'Preprocess Data',
        image = 'mavencodevv/preprocess:v.0.1',
        arguments = ['--data', data],
        file_outputs={'clean_data': '/preprocessing/clean_data'     
        }
    )

In [None]:
def train_op(clean_data):
    return dsl.ContainerOp(
        name = 'Train data',
        image = 'mavencodevv/train:v.0.1',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'fit_model': '/train_model/fit_model' 
        }
    )

In [None]:
def test_op(clean_data, fit_model):
    return dsl.ContainerOp(
        name = 'Test model',
        image = 'mavencodevv/test_ca:v.0.1',
        arguments = ['--clean_data', clean_data,
                    '--fit_model', fit_model
            ],
        file_outputs={
            'metrics': '/test_model/metrics' 
        }
    )

In [None]:
def results(metrics):
    return dsl.ContainerOp(
        name = 'Results',
        image = 'mavencodevv/metrics_ld:v.0.1',
        arguments = ['--metrics', metrics
            ] 
        
    )

In [None]:
@dsl.pipeline(
    name='Churn classiifer pipeline',
   description='An ML reusable pipeline that performs customer segmentation to determine customers with high risk of leaving a telcommunication company .'
)
# Define parameters to be fed into pipeline
def churn_classifier_pipeline(): 
    
    _load_data_op = load_data_op()
    
    _preprocess_op = preprocess_op(
        dsl.InputArgumentPath(_load_data_op.outputs['data'])).after(_load_data_op)
    
    _train_op = train_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)
    
    _test_op = test_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data']),
        dsl.InputArgumentPath(_train_op.outputs['fit_model'])).after(_train_op)
    _results = results(
        dsl.InputArgumentPath(_test_op.outputs['metrics'])).after(_test_op)
    

In [None]:
# Compile pipeline to generate compressed YAML definition of the pipeline.
experiment_name = 'churn_classifier_pipeline'
#kfp.compiler.Compiler().compile(churn_reuseable_tensorflow_pipeline,  
#  '{}.zip'.format(experiment_name))
kfp.compiler.Compiler().compile(churn_classifier_pipeline, '{}.yaml'.format(experiment_name))



In [None]:
client = kfp.Client()
client.create_run_from_pipeline_func(churn_classifier_pipeline, arguments={})

RunPipelineResult(run_id=c0665dc5-a861-4430-b876-ae2ae1c0a4aa)