In [1]:
!python -m pip install --user --upgrade pip

Collecting pip
  Downloading pip-21.1.2-py3-none-any.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 6.9 MB/s eta 0:00:01
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 21.1.1
    Uninstalling pip-21.1.1:
      Successfully uninstalled pip-21.1.1
Successfully installed pip-21.1.2


In [2]:
!python -m pip install --user --upgrade pip



In [3]:
import kfp
from kfp import dsl
import kfp.components as comp

In [4]:
def load_data_op():
    return dsl.ContainerOp(
        name = 'Load Data',
        image = 'mavencodev/load_customer:v.0.1',
        arguments = [],
        file_outputs={
            'data': '/load_data/data'
        }      
    )

In [5]:
def stat_op(data):
    return dsl.ContainerOp(
        name = 'Dataset Statistics',
        image =  'mavendevv/stat2_customer:v.0.1', #'mavencodev/stat_customer:v.0.18',
        arguments = ['--data', data],
        file_outputs={
            'stats': '/statgen/stats',
            'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'
        }
    )

In [6]:
'''def schema_op(stats):
    return dsl.ContainerOp(
        name = 'Dataset Schema',
        image = 'mavendevv/schema_customer6:v.0.2', #'mavendevv/schema5_cust:v.0.2',
        arguments = ['--stats', stats],
        file_outputs={
            'schema': '/schema/schema',
            'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'
        }      
    )'''

"def schema_op(stats):\n    return dsl.ContainerOp(\n        name = 'Dataset Schema',\n        image = 'mavendevv/schema_customer6:v.0.2', #'mavendevv/schema5_cust:v.0.2',\n        arguments = ['--stats', stats],\n        file_outputs={\n            'schema': '/schema/schema',\n            'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'\n        }      \n    )"

In [7]:
def schema_op(stats):
    return dsl.ContainerOp(
        name = 'Dataset Schema',
        image = 'mavendevv/schema_customer8:v.0.2', #'mavendevv/schema5_cust:v.0.2',
        arguments = ['--stats', stats],
        file_outputs={
            'schema': '/schema/schema'
        }      
    )

In [8]:
#mavendevv/schema_customer2:v.0.2

In [9]:
def val_op(stats, schema):
    return dsl.ContainerOp(
        name = 'Dataset Validation',
        image = 'mavencodev/valid_customer:v.0.3',
        arguments = ['--stats', stats, '--schema', schema],
        file_outputs={
            'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'}
    )

In [10]:
def preprocess_op(data):
    return dsl.ContainerOp(
        name = 'Preprocess Data',
        image = 'mavendevv/preprocess_customer2:v.0.2',
        arguments = ['--data', data],
        file_outputs={
            'clean_data': '/preprocess/clean_data'   
        }
    )

In [11]:
def lr_op(clean_data):
    return dsl.ContainerOp(
        name = 'Logistic Regression',
        image = 'mavendevv/logistic_customer3:v.0.1',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'lr_metrics': '/logistic/lr_metrics' 
        }
    )

In [12]:
def rf_op(clean_data):
    return dsl.ContainerOp(
        name = 'Randomforest',
        image = 'mavencodev/rf_customer:v.0.1',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'rf_metrics': '/randomforest/rf_metrics' 
        }
    )

In [13]:
def gnb_op(clean_data):
    return dsl.ContainerOp(
        name = 'Gaussian NB',
        image = 'mavendevv/gnb_customer2:v.0.1',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'gnb_metrics': '/gaussianNB/gnb_metrics' 
        }
    )

In [14]:
def eval_op(lr_metrics,rf_metrics,gnb_metrics):
    return dsl.ContainerOp(
        name = 'Model Evaluation',
        image = 'mavencodev/eval_customer:v.0.3',
        arguments = [ '--lr_metrics', lr_metrics,
                     '--rf_metrics', rf_metrics,
                     '--gnb_metrics', gnb_metrics
                     ],
                   
        file_outputs={
            'best_model': '/eval/best_model' 
        }    
    )

In [15]:
def push_op(bucket_name,credentials,best_model):
    return dsl.ContainerOp(
        name = 'Export Model to Cloud Storage',
        image = 'mavencodev/push_customer',
        arguments = ['--bucket_name', bucket_name,
                     '--credentials', credentials,
                     '--best_model',best_model
                    ]
  )

In [16]:
@dsl.pipeline(
    name='customer propensity Prediction',
    description='An ML reusable pipeline that predicts the chances of a customer to purchase a product'
)

# Define parameters to be fed into pipeline
def customer_pipeline(bucket_name, credentials): 
    
    _load_data_op = load_data_op()
    
    _stat_op  = stat_op(
        dsl.InputArgumentPath(_load_data_op.outputs['data'])).after(_load_data_op)
    
    _schema_op = schema_op(
        dsl.InputArgumentPath(_stat_op.outputs['stats'])).after(_stat_op)
    
    _val_op = val_op(
        dsl.InputArgumentPath(_stat_op.outputs['stats']),
        dsl.InputArgumentPath(_schema_op.outputs['schema'])).after(_stat_op,_schema_op)
    
    _preprocess_op = preprocess_op(
        dsl.InputArgumentPath(_load_data_op.outputs['data'])).after(_load_data_op,_val_op)
    
    _lr_op = lr_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)
    
    _rf_op = rf_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)
    
    _gnb_op = gnb_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)
    
    _eval_op = eval_op(
        dsl.InputArgumentPath(_lr_op.outputs['lr_metrics']),
        dsl.InputArgumentPath(_rf_op.outputs['rf_metrics']),
        dsl.InputArgumentPath(_gnb_op.outputs['gnb_metrics'])).after(_lr_op,_rf_op,_gnb_op)
    
    _push_op = push_op(bucket_name, credentials,
        dsl.InputArgumentPath(_eval_op.outputs['best_model'])).after(_eval_op)

In [17]:
# Compile pipeline to generate compressed YAML definition of the pipeline.
experiment_name = 'customer_pipeline'

kfp.compiler.Compiler().compile(customer_pipeline, '{}.yaml'.format(experiment_name))



In [18]:
client = kfp.Client()
client.create_run_from_pipeline_func(customer_pipeline, arguments={})

RunPipelineResult(run_id=08d0c1e8-118f-4322-b760-b06ef7e5549c)