In [1]:
#!python -m pip install --user --upgrade pip

Collecting pip
  Using cached pip-21.0-py3-none-any.whl (1.5 MB)
  Downloading pip-20.3.4-py2.py3-none-any.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 6.8 MB/s eta 0:00:01
[?25h

In [2]:
#!pip3 install kfp --upgrade --user

Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.
Collecting kfp
  Using cached kfp-1.3.0-py3-none-any.whl
  Downloading kfp-1.2.0.tar.gz (165 kB)
[K     |████████████████████████████████| 165 kB 7.4 MB/s eta 0:00:01


### after installing the necessary packages, please restart kernel before continuing

In [1]:
import kfp
from kfp import dsl
import kfp.components as comp

### defining the component that obtains data from the source

In [2]:
def obtain_data_op():
    return dsl.ContainerOp(
        name = 'Obtain Data',
        image = 'mavencodev/obtain-data-component:v.0.1',
        arguments = [],
        file_outputs={
            'data': '/obtain_data/data'
        }      
    )

### defining the component that handles data preprocessing

In [3]:
def preprocess_op(data):
    return dsl.ContainerOp(
        name = 'Preprocess Data',
        image = 'mavencodev/preprocess-component:v.0.1',
        arguments = ['--data', data],
        file_outputs={
            'X_train': '/preprocess_data/X_train.npy',
            'X_test': '/preprocess_data/X_test.npy',
            'y_train': '/preprocess_data/y_train.npy',
            'y_test': '/preprocess_data/y_test.npy'     
        }
    )

###  defining the component that trains the model

In [4]:
def train_op(X_train, y_train):
    return dsl.ContainerOp(
        name = 'Train data',
        image = 'mavencodev/train-pytorch:v.0.1',
        arguments = ['--X_train', X_train,
                    '--y_train', y_train
            ],
        file_outputs={
            'model': '/train_pytorch/pyclassifier.pt' 
        }
    )

### defining the component that handles predictions

In [5]:
def predict_op(X_test, y_test, model):
    return dsl.ContainerOp(
        name = 'Predict data',
        image = 'mavencodev/predict-pytorch:v.0.1',
        arguments = ['--X_test', X_test,
                    '--y_test', y_test,
                    '--model', model
            ],
        file_outputs={
            'results': '/predict_pytorch/results.txt' 
        }
    )

### Defining the pipeline and how data is passed between components

In [8]:
@dsl.pipeline(
    name='Churn modelling pipeline',
   description='An ML reusable pipeline that performs customer segmentation to determine customers with high risk of leaving a bank .'
)

# Define parameters to be fed into pipeline
def churn_reuseable_pytorch_pipeline(): 
    
    _obtain_data_op = obtain_data_op()
    
    _preprocess_op = preprocess_op(
        dsl.InputArgumentPath(_obtain_data_op.outputs['data'])).after(_obtain_data_op)
    
    _train_op = train_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['X_train']),
        dsl.InputArgumentPath(_preprocess_op.outputs['y_train'])).after(_preprocess_op)
    
    _predict_op = predict_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['X_test']),
        dsl.InputArgumentPath(_preprocess_op.outputs['y_test']),
        dsl.InputArgumentPath(_train_op.outputs['model'])).after(_train_op)
    

### compiling the pipeline

In [9]:
# Compile pipeline to generate compressed YAML definition of the pipeline.
experiment_name = 'churn_analysis_pytorch_pipeline'
#kfp.compiler.Compiler().compile(churn_reuseable_tensorflow_pipeline,  
#  '{}.zip'.format(experiment_name))
kfp.compiler.Compiler().compile(churn_reuseable_pytorch_pipeline, '{}.yaml'.format(experiment_name))

### running the pipeline

In [10]:
client = kfp.Client()
client.create_run_from_pipeline_func(churn_reuseable_pytorch_pipeline, arguments={})

RunPipelineResult(run_id=f28c725a-e58b-4694-9e1b-ccda1ec9be05)