# Kubeflow pipeline

Setup environment

In [None]:
pip freeze | grep kfp || pip install kfp

In [1]:
from os import path

import kfp
import kfp.compiler as compiler
import kfp.components as comp
import kfp.dsl as dsl
import kfp.gcp as gcp
import kfp.notebook

## Kubeflow cluster parameters

In [2]:
HOST = "5b8b92b6f76cc052-dot-us-central2.pipelines.googleusercontent.com"
BUCKET = "ds-dev-playground"

In [3]:
client = kfp.Client(host=HOST)

In [4]:
exp = client.create_experiment(name='exoplanets')
client.list_experiments()

{'experiments': [{'created_at': datetime.datetime(2021, 2, 21, 18, 1, 12, tzinfo=tzlocal()),
                  'description': 'All runs created without specifying an '
                                 'experiment will be grouped here.',
                  'id': '2040adb0-db22-40be-a536-e83aa602c131',
                  'name': 'Default',
                  'resource_references': None,
                  'storage_state': 'STORAGESTATE_AVAILABLE'},
                 {'created_at': datetime.datetime(2021, 2, 21, 18, 8, 18, tzinfo=tzlocal()),
                  'description': None,
                  'id': '9073c51b-bd67-4a96-a1bf-d3a5a9907c1c',
                  'name': 'exoplanets',
                  'resource_references': None,
                  'storage_state': 'STORAGESTATE_AVAILABLE'}],
 'next_page_token': None,
 'total_size': 2}

## Build and push the images on gcp container repo

In [5]:
# Builds the exoplanets preprocess container
!components/preprocess/scripts/build.sh

Sending build context to Docker daemon  27.14kB
Step 1/4 : FROM python:3.7-slim
 ---> fabb1ac41e06
Step 2/4 : COPY . /code
 ---> Using cache
 ---> bda0019f2790
Step 3/4 : WORKDIR /code
 ---> Using cache
 ---> 456f3c81f791
Step 4/4 : RUN pip install -U numpy xgboost
 ---> Using cache
 ---> 259b5df18aa2
Successfully built 259b5df18aa2
Successfully tagged gcr.io/ds-dev-playground/exoplanets_kubeflow-preprocess:latest


In [6]:
# Pushes the exoplanets preprocess container to gcr/io
!components/preprocess/scripts/push.sh

The push refers to repository [gcr.io/ds-dev-playground/exoplanets_kubeflow-preprocess]

[1B62061195: Preparing 
[1B60accd3c: Preparing 
[1B9a903741: Preparing 
[1Bf1c57fd6: Preparing 
[1Bc0e8385f: Preparing 
[1Baf342595: Preparing 
[2Baf342595: Layer already exists [4A[2K[1A[2Klatest: digest: sha256:c6fc07dbc71d5caf92f74fca36b8ef520cd71ab1334adc714f0bd13dd8e041f1 size: 1791


## Create a Kubeflow pipeline¶


In [7]:
PIPELINE_TAR = 'exoplanets.tar.gz'
PREPROCESS_YAML = 'components/preprocess/preprocess.yaml'

In [8]:
@dsl.pipeline(
    name='ExoPlanets_kubeflow',
    description='ExoPlanets - Kubeflow edn-to-end pipeline')

def pipeline(gcs_bucket_name='<bucket where data and model will be exported>'):

    preprocess_op = comp.load_component_from_file(PREPROCESS_YAML)
    preprocess = preprocess_op(
        input_bucket=gcs_bucket_name,
    )

In [9]:
# Compile the pipeline
compiler.Compiler().compile(pipeline, PIPELINE_TAR)

In [10]:
ls $PIPELINE_TAR

exoplanets.tar.gz


## Run the Kubeflow pipeline

In [11]:
run = client.run_pipeline(
    experiment_id=exp.id, 
    job_name='exoplanets_preprocess_v5', 
    pipeline_package_path=PIPELINE_TAR, 
    params={
        'gcs_bucket_name': "exoplanets_kubeflow",
    },
)