In [1]:
!python -m pip install --user --upgrade pip

Collecting pip
[?25l  Downloading https://files.pythonhosted.org/packages/fe/ef/60d7ba03b5c442309ef42e7d69959f73aacccd0d86008362a681c4698e83/pip-21.0.1-py3-none-any.whl (1.5MB)
[K     |████████████████████████████████| 1.5MB 5.5MB/s 
[?25hInstalling collected packages: pip
Successfully installed pip-21.0.1


In [2]:
!pip3 install kfp --upgrade --user

Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.
Collecting kfp
  Downloading kfp-1.4.0.tar.gz (159 kB)
[K     |████████████████████████████████| 159 kB 5.8 MB/s 
[?25hCollecting PyYAML>=5.3
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 49.8 MB/s 
Collecting kubernetes<12.0.0,>=8.0.0
  Downloading kubernetes-11.0.0-py3-none-any.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 55.0 MB/s 
Collecting requests_toolbelt>=0.8.0
  Downloading requests_toolbelt-0.9.1-py2.py3-none-any.whl (54 kB)
[K     |████████████████████████████████| 54 kB 1.7 MB/s 
Collecting kfp-server-api<2.0.0,>=1.1.2
  Downloading kfp-server-api-1.4.1.tar.gz (50 kB)
[K     |████████████████████████████████| 50 kB 6.2 MB/s 
[?25hCollecting jsonschema>=3.0.1
  Downloading jsonschema-3.2.0-py2.py3-

In [33]:
import kfp
from kfp import dsl
import kfp.components as comp

In [34]:
def load_data_op():
    return dsl.ContainerOp(
        name = 'Load Data',
        image = 'mavencodevv/load_heart:v.0.1',
        arguments = [],
        file_outputs={
            'data': '/load_data/data'
        }      
    )

In [77]:
def stat_op(data):
  return dsl.ContainerOp(
        name = 'Dataset Statistics',
        image = 'mavencodevv/stat_heart:v.0.18',
        arguments = ['--data', data],
        file_outputs={
            'stats': '/statgen/stats',
            'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'
        }
    )

In [78]:
def schema_op(stats):
  return dsl.ContainerOp(
        name = 'Dataset Schema',
        image = 'mavencodevv/schema_heart:v.0.2',
        arguments = ['--stats', stats],
        file_outputs={
            'schema': '/schema/schema',
            'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'
        }      
    )

In [105]:
def val_op(stats, schema):
  return dsl.ContainerOp(
        name = 'Dataset Validation',
        image = 'mavencodevv/valid_heart:v.0.3',
        arguments = ['--stats', stats, '--schema', schema],
        file_outputs={
            'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'}
    )

In [106]:
def preprocess_op(data):
    return dsl.ContainerOp(
        name = 'Preprocess Data',
        image = 'mavencodevv/preprocess_heart:v.0.1',
        arguments = ['--data', data],
        file_outputs={
            'clean_data': '/preprocess/clean_data'   
        }
    )

In [107]:
def rf_op(clean_data):
    return dsl.ContainerOp(
        name = 'Randomforest',
        image = 'mavencodevv/rf_heart:v.0.1',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'rf_metrics': '/random/rf_metrics' 
        }
    )

In [108]:
def lr_op(clean_data):
    return dsl.ContainerOp(
        name = 'Logistic Regression',
        image = 'mavencodevv/logistic_heart:v.0.1',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'lr_metrics': '/logistic/lr_metrics' 
        }
    )

In [109]:
def keras_op(clean_data):
    return dsl.ContainerOp(
        name = 'Keras Model',
        image = 'mavencodevv/km_heart:v.0.1',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'keras_metrics': '/keras_model/keras_metrics' 
        }
    )

In [110]:
def cb_op(clean_data):
    return dsl.ContainerOp(
        name = 'CatBoost',
        image = 'mavencodevv/cb_heart:v.0.1',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'cb_metrics': '/cb/cb_metrics' 
        }
    )

In [111]:
def knn_op(clean_data):
    return dsl.ContainerOp(
        name = 'KNN model',
        image = 'mavencodevv/knn_heart:v.0.1',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'knn_metrics': '/knn/knn_metrics' 
        }
    )

In [112]:
def sv_op(clean_data):
    return dsl.ContainerOp(
        name = 'SVC model',
        image = 'mavencodevv/sv_heart:v.0.1',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'sv_metrics': '/sv/sv_metrics' 
        }
    )

In [113]:
def eval_op(rf_metrics,keras_metrics,lr_metrics,sv_metrics, knn_metrics,cb_metrics):
    return dsl.ContainerOp(
        name = 'Model Evaluation',
        image = 'mavencodevv/eval_heart:v.0.3',
        arguments = ['--rf_metrics', rf_metrics,
                     '--keras_metrics', keras_metrics,
                     '--lr_metrics', lr_metrics,
                     '--sv_metrics', sv_metrics,
                     '--knn_metrics', knn_metrics,
                     '--cb_metrics', cb_metrics
                     ],
                   
        file_outputs={
            'best_model': '/eval/best_model' 
        }    
    )

In [114]:
def push_op(bucket_name,credentials,best_model):
  return dsl.ContainerOp(
      name = 'Export Model to Cloud Storage',
      image = 'mavencodevv/push_heart',
      arguments = ['--bucket_name', bucket_name,
                   '--credentials', credentials,
                   '--best_model',best_model
                   ]
  )

In [115]:
@dsl.pipeline(
    name='Heart Attack Prediction',
   description='An ML reusable pipeline that predicts the chances of a patient having heart attack'
)

# Define parameters to be fed into pipeline
def heart_pipeline(bucket_name, credentials): 
  
  _load_data_op = load_data_op()

  _stat_op  = stat_op(
        dsl.InputArgumentPath(_load_data_op.outputs['data'])).after(_load_data_op)

  _schema_op = schema_op(
      dsl.InputArgumentPath(_stat_op.outputs['stats'])).after(_stat_op)
  

  _val_op = val_op(
      dsl.InputArgumentPath(_stat_op.outputs['stats']),
      dsl.InputArgumentPath(_schema_op.outputs['schema'])).after(_stat_op,_schema_op)
  
  _preprocess_op = preprocess_op(
        dsl.InputArgumentPath(_load_data_op.outputs['data'])).after(_load_data_op,_val_op)
    
  _rf_op = rf_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)

  _keras_op = keras_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)
  _lr_op = lr_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)
  _cb_op = cb_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)
  _knn_op = knn_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)

  _sv_op = sv_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)

  _eval_op = eval_op(
        dsl.InputArgumentPath(_rf_op.outputs['rf_metrics']),
        dsl.InputArgumentPath(_keras_op.outputs['keras_metrics']),
        dsl.InputArgumentPath(_lr_op.outputs['lr_metrics']),
        dsl.InputArgumentPath(_cb_op.outputs['cb_metrics']),
        dsl.InputArgumentPath(_knn_op.outputs['knn_metrics']),
        dsl.InputArgumentPath(_sv_op.outputs['sv_metrics'])).after(_rf_op,_keras_op,_lr_op,_cb_op,_knn_op,_sv_op)

  _push_op = push_op(bucket_name, credentials,
        dsl.InputArgumentPath(_eval_op.outputs['best_model'])).after(_eval_op)   
 

In [116]:
# Compile pipeline to generate compressed YAML definition of the pipeline.
experiment_name = 'heart_pipeline'

kfp.compiler.Compiler().compile(heart_pipeline, '{}.yaml'.format(experiment_name))



In [None]:
client = kfp.Client()
#client.create_run_from_pipeline_func(heart_pipeline, arguments={})