In [1]:
!python -m pip install --user --upgrade pip

Collecting pip
[?25l  Downloading https://files.pythonhosted.org/packages/fe/ef/60d7ba03b5c442309ef42e7d69959f73aacccd0d86008362a681c4698e83/pip-21.0.1-py3-none-any.whl (1.5MB)
[K     |▏                               | 10kB 15.8MB/s eta 0:00:01[K     |▍                               | 20kB 22.0MB/s eta 0:00:01[K     |▋                               | 30kB 26.5MB/s eta 0:00:01[K     |▉                               | 40kB 29.9MB/s eta 0:00:01[K     |█                               | 51kB 31.8MB/s eta 0:00:01[K     |█▎                              | 61kB 24.0MB/s eta 0:00:01[K     |█▌                              | 71kB 23.4MB/s eta 0:00:01[K     |█▊                              | 81kB 18.3MB/s eta 0:00:01[K     |██                              | 92kB 18.9MB/s eta 0:00:01[K     |██▏                             | 102kB 18.6MB/s eta 0:00:01[K     |██▍                             | 112kB 18.6MB/s eta 0:00:01[K     |██▌                             | 122kB 18.6MB/s et

In [2]:
!pip3 install kfp --upgrade --user

Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.
Collecting kfp
  Downloading kfp-1.4.0.tar.gz (159 kB)
[K     |████████████████████████████████| 159 kB 18.3 MB/s 
[?25hCollecting PyYAML>=5.3
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 39.0 MB/s 
Collecting kubernetes<12.0.0,>=8.0.0
  Downloading kubernetes-11.0.0-py3-none-any.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 40.7 MB/s 
Collecting requests_toolbelt>=0.8.0
  Downloading requests_toolbelt-0.9.1-py2.py3-none-any.whl (54 kB)
[K     |████████████████████████████████| 54 kB 2.2 MB/s 
Collecting kfp-server-api<2.0.0,>=1.1.2
  Downloading kfp-server-api-1.4.1.tar.gz (50 kB)
[K     |████████████████████████████████| 50 kB 5.9 MB/s 
[?25hCollecting jsonschema>=3.0.1
  Downloading jsonschema-3.2.0-py2.py3

In [11]:
import kfp
from kfp import dsl
import kfp.components as comp

In [12]:
def load_data_op():
    return dsl.ContainerOp(
        name = 'Load Data',
        image = 'mavencodevv/load_nba:v.0.1',
        arguments = [],
        file_outputs={
            'data': '/load_data/data'
        }      
    )

In [13]:
def preprocess_op(data):
    return dsl.ContainerOp(
        name = 'Preprocess Data',
        image = 'mavencodevv/preprocess_nba:v.0.1',
        arguments = ['--data', data],
        file_outputs={
            'clean_data': '/preprocess/clean_data'   
        }
    )

In [14]:
def rf_op(clean_data):
    return dsl.ContainerOp(
        name = 'Randomforest',
        image = 'mavencodevv/rf_nba:v.0.2',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'rf_metrics': '/randomforest/rf_metrics' 
        }
    )

In [15]:
def gbc_op(clean_data):
    return dsl.ContainerOp(
        name = 'GBC',
        image = 'mavencodevv/gbc_nba:v.0.2',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'gbc_metrics': '/gbc/gbc_metrics' 
        }
    )

In [16]:
def lr_op(clean_data):
    return dsl.ContainerOp(
        name = 'Logistic Regression',
        image = 'mavencodevv/lr_nba:v.0.2',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'lr_metrics': '/lr/lr_metrics' 
        }
    )

In [17]:
def results(rf_metrics,gbc_metrics,lr_metrics):
    return dsl.ContainerOp(
        name = 'Results',
        image = 'mavencodevv/results_nba:v.0.1',
        arguments = ['--rf_metrics', rf_metrics,
                     '--gbc_metrics', gbc_metrics,
                     '--lr_metrics', lr_metrics
                     ] 
        
    )

In [18]:
@dsl.pipeline(
    name='Shot Result Prediction',
   description='An ML reusable pipeline that predicts whether a shot from an NBA player will go in or not'
)

# Define parameters to be fed into pipeline
def nba_pipeline(): 

  _load_data_op = load_data_op()
    
  _preprocess_op = preprocess_op(
        dsl.InputArgumentPath(_load_data_op.outputs['data'])).after(_load_data_op)
    
  _rf_op = rf_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)

  _gbc_op = gbc_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)
  _lr_op = lr_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)
   
  _results_op = results(
        dsl.InputArgumentPath(_rf_op.outputs['rf_metrics']),
        dsl.InputArgumentPath(_gbc_op.outputs['gbc_metrics']),
        dsl.InputArgumentPath(_lr_op.outputs['lr_metrics'])).after(_rf_op,_gbc_op,_lr_op)
    

In [19]:
# Compile pipeline to generate compressed YAML definition of the pipeline.
experiment_name = 'nba_pipeline'
#kfp.compiler.Compiler().compile(churn_reuseable_tensorflow_pipeline,  
#  '{}.zip'.format(experiment_name))
kfp.compiler.Compiler().compile(nba_pipeline, '{}.yaml'.format(experiment_name))



In [None]:
#client = kfp.Client()
#client.create_run_from_pipeline_func(nba_pipeline, arguments={})