In [1]:
#!python -m pip install --user --upgrade pip

Collecting pip
[?25l  Downloading https://files.pythonhosted.org/packages/de/47/58b9f3e6f611dfd17fb8bd9ed3e6f93b7ee662fb85bdfee3565e8979ddf7/pip-21.0-py3-none-any.whl (1.5MB)
[K     |████████████████████████████████| 1.5MB 7.5MB/s eta 0:00:01
[?25hInstalling collected packages: pip
Successfully installed pip-21.0
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
#!pip3 install kfp --upgrade --user

Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.
Collecting kfp
  Downloading kfp-1.3.0.tar.gz (170 kB)
[K     |████████████████████████████████| 170 kB 4.9 MB/s eta 0:00:01
Collecting requests_toolbelt>=0.8.0
  Downloading requests_toolbelt-0.9.1-py2.py3-none-any.whl (54 kB)
[K     |████████████████████████████████| 54 kB 3.4 MB/s  eta 0:00:01
Collecting kfp-server-api<2.0.0,>=1.1.2
  Downloading kfp-server-api-1.3.0.tar.gz (54 kB)
[K     |████████████████████████████████| 54 kB 4.5 MB/s  eta 0:00:01
Collecting tabulate
  Downloading tabulate-0.8.7-py3-none-any.whl (24 kB)
Collecting click
  Downloading click-7.1.2-py2.py3-none-any.whl (82 kB)
[K     |████████████████████████████████| 82 kB 1.3 MB/s  eta 0:00:01
[?25hCollecting Deprecated
  Downloading Deprecated-1.2.11-py2.py3-none-any.whl (9.1 kB)
Collecting strip-hints
  Downloading strip-hint

### after installing the necessary packages, please restart kernel before continuing

###  importing some Kubeflow Pipeline SDK packages

In [1]:
import kfp
from kfp import dsl
import kfp.components as comp

### component that obtains data from source

In [2]:
def obtain_data_op():
    return dsl.ContainerOp(
        name = 'Obtain Data',
        image = 'mavencodev/obtain-data-component:v.0.1',
        arguments = [],
        file_outputs={
            'data': '/obtain_data/data'
        }      
    )

### component that handles preprocessing of the data from the obtain_data component

In [3]:
def preprocess_op(data):
    return dsl.ContainerOp(
        name = 'Preprocess Data',
        image = 'mavencodev/preprocess-component:v.0.1',
        arguments = ['--data', data],
        file_outputs={
            'X_train': '/preprocess_data/X_train.npy',
            'X_test': '/preprocess_data/X_test.npy',
            'y_train': '/preprocess_data/y_train.npy',
            'y_test': '/preprocess_data/y_test.npy'     
        }
    )

### component that handles training of the model using data from preprocess component

In [4]:
def train_op(X_train, y_train):
    return dsl.ContainerOp(
        name = 'Train Model',
        image = 'mavencodev/train-tensorflow:v.0.1' ,
        arguments = [
            '--X_train', X_train,
            '--y_train', y_train   
        ],
        file_outputs={
            'model':'/tf_data/classifier.h5'
        }
    )

### component that handles predicting of data using data from preprocess component and model from train component

In [5]:
def predict_op(X_test, y_test, model):
    return dsl.ContainerOp(
        name = 'Predictions',
        image = 'mavencodev/predict-tensorflow:v.0.1' ,
        arguments = [
            '--X_test', X_test,
            '--y_test', y_test,
            '--model', model    
        ],
        file_outputs={
            'performance':'/predict_data/performance.txt',
            'results':'/predict_data/results.txt'
        }
    )

### Defining the pipeline and how data is passed between components

In [6]:
@dsl.pipeline(
    name='Churn modelling pipeline',
   description='An ML reusable pipeline that performs customer segmentation to determine customers with high risk of leaving a bank .'
)

# Define parameters to be fed into pipeline
def churn_reuseable_tensorflow_pipeline(): 
    
        _obtain_data_op = obtain_data_op()

        _preprocess_op = preprocess_op(
            dsl.InputArgumentPath(_obtain_data_op.outputs['data'])).after(_obtain_data_op)

        _train_op = train_op(
            dsl.InputArgumentPath(_preprocess_op.outputs['X_train']),
            dsl.InputArgumentPath(_preprocess_op.outputs['y_train'])).after(_preprocess_op)

        _predict_op = predict_op(
            dsl.InputArgumentPath(_preprocess_op.outputs['X_test']),
            dsl.InputArgumentPath(_preprocess_op.outputs['y_test']),
            dsl.InputArgumentPath(_train_op.outputs['model'])).after(_train_op)

### compiling the pipeline

In [7]:
# Compile pipeline to generate compressed YAML definition of the pipeline.
experiment_name = 'churn_analysis_tensorflow_pipeline'
#kfp.compiler.Compiler().compile(churn_reuseable_tensorflow_pipeline,  
#  '{}.zip'.format(experiment_name))
kfp.compiler.Compiler().compile(churn_reuseable_tensorflow_pipeline, '{}.yaml'.format(experiment_name))



### running the pipeline

In [8]:
client = kfp.Client()
client.create_run_from_pipeline_func(churn_reuseable_tensorflow_pipeline, arguments={})

RunPipelineResult(run_id=ccaa1514-fe65-410b-973a-2e7386340e45)