In [None]:
#!python -m pip install --user --upgrade pip

In [None]:
import kfp
from kfp import dsl

### component that obtains data

In [None]:
def obtain_data_op():
    return dsl.ContainerOp(
        name = 'Obtain Data',
        image = ,
        arguments = [],
        file_outputs={
            'data': '/obtain_data/data'
        }      
    )

### component that does preprocessing

In [None]:
def preprocess_op(data):
    return dsl.ContainerOp(
        name = 'Preprocess Data',
        image = 
        arguments = [
            '--data', data
        ],
        file_outputs={
            'clean_data':'/preprocess_data/clean_data'      
        }
    )

### component for training the model

In [None]:
def train_op(X_train, y_train):
    return dsl.ContainerOp(
        name = 'Train Model',
        image = ,
        arguments = [
            '--clean_data', clean_data    
        ],
        file_outputs={
            'X_test':'/train_data/X_test.npy',
            'y_test':'/train_data/y_test.npy'  
            'model':'/train_data/classifier.h5'
        }
    )

### components for predicting on the test data

In [None]:
def predict_op(X_test, y_test, model):
    return dsl.ContainerOp(
        name='Predict Model',
        image=
        arguments = [
            '--X_test', X_test,
            '--y_test', y_test,
            '--model', model
        ],
        file_outputs={
            'results':'/predict_data/results'
        }
    )

### Defining pipeline and including its components

In [None]:
@dsl.pipeline(
    name='Churn modelling pipeline',
   description='An ML reusable pipeline that performs customer segmentation to determine customers with high risk of leaving a bank .'
)

# Define parameters to be fed into pipeline
def churn_reuseable_pipeline():
    _obtain_data_op = obtain_data_op()
    
    _preprocess_op = preprocess_op(
        dsl.InputArgumentPath(_obtain_data_op.outputs['data'])).after(_obtain_data_op)
    
    _train_op = train_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)

    _predict_op = predict_op(
        dsl.InputArgumentPath(_train_op.outputs['X_test']),
        dsl.InputArgumentPath(_train_op.outputs['y_test']),
        dsl.InputArgumentPath(_train_op.outputs['model'])).after(_train_op)

In [None]:
# Compile pipeline to generate compressed YAML definition of the pipeline.
experiment_name = 'churn_analysis_pipeline'
kfp.compiler.Compiler().compile(churn_reuseable_pipeline,  
  '{}.zip'.format(experiment_name))

### running the pipeline

In [None]:
client = kfp.Client()
client.create_run_from_pipeline_func(churn_reuseable_pipeline, arguments={})