## Building Docker Artifacts

In [25]:
cd pipeline_components

/home/melnamoury/Desktop/kubeflow-sdk-demo/pipeline_components


In [27]:
!docker login -u username -p password

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded


In [28]:
%%writefile config.py
Bucket_uri="Bucket URI"
version=1
store_artifacts=Bucket_uri + "/" + str(version)
data_path=Bucket_uri + "/" + "data/train.csv"
processed_data=Bucket_uri + "/" + "processed/data_processed.csv"

Writing config.py


In [46]:
%%writefile Dockerfile
FROM gcr.io/deeplearning-platform-release/tf-cpu
ARG DEBIAN_FRONTEND=noninteractive
# Install apt dependencies
WORKDIR /pipeline
COPY ./ ./
RUN pip install -r requirements.txt
RUN pip install "dask[dataframe]" --upgrade
ENV TF_CPP_MIN_LOG_LEVEL 3

Overwriting Dockerfile


## Build Docker Image

In [47]:
!docker build -t melnamoury/kubeflow-sdk-farmer .

Sending build context to Docker daemon  4.657MB
Step 1/7 : FROM gcr.io/deeplearning-platform-release/tf-cpu
 ---> 20d85d5ff60a
Step 2/7 : ARG DEBIAN_FRONTEND=noninteractive
 ---> Using cache
 ---> a8595af4abb3
Step 3/7 : WORKDIR /pipeline
 ---> Running in 811678c3b1a2
Removing intermediate container 811678c3b1a2
 ---> 89afbec689bd
Step 4/7 : COPY ./ ./
 ---> 496e65e8bfd8
Step 5/7 : RUN pip install -r requirements.txt
 ---> Running in 5edfa81b3820
Collecting sklearn
  Downloading sklearn-0.0.tar.gz (1.1 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting xgboost
  Downloading xgboost-1.6.1-py3-none-manylinux2014_x86_64.whl (192.9 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 192.9/192.9 MB 2.0 MB/s eta 0:00:00
Collecting dask[dataframe]
  Downloading dask-2022.2.0-py3-none-any.whl (1.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.1/1.1 MB 1.5 MB/s eta 0:00:00
Collecting google-cloud
  Downloading google_cloud-0.3

## Push Docker Image to Docker hub

In [48]:
!docker push melnamoury/kubeflow-sdk-farmer

The push refers to repository [docker.io/melnamoury/kubeflow-sdk-farmer]

[1Ba508d677: Preparing 
[1Beee8f0b9: Preparing 
[1B9fde6097: Preparing 
[1B81ed4975: Preparing 
[1Bdaf002b4: Preparing 
[1B99a0501a: Preparing 
[1Bd66050d7: Preparing 
[1Bbcab0add: Preparing 
[1Ba316c5c6: Preparing 
[1B198535a8: Preparing 
[1B41727f83: Preparing 
[1B5878c855: Preparing 
[1B78b45d85: Preparing 
[5B198535a8: Waiting g 
[1B59494948: Preparing 
[4B78b45d85: Waiting g 
[6B5878c855: Waiting g 
[1B52904e20: Preparing 
[6B5965a208: Waiting g 
[1B8c5c8a78: Preparing 
[7B59494948: Waiting g 
[1B5afcf21f: Preparing 
[17B66050d7: Pushing  78.97MB/3.222GB[23A[2K[20A[2K[21A[2K[19A[2K[21A[2K[21A[2K[22A[2K[19A[2K[19A[2K[19A[2K[19A[2K[22A[2K[21A[2K[19A[2K[21A[2K[22A[2K[20A[2K[23A[2K[21A[2K[19A[2K[22A[2K[21A[2K[22A[2K[21A[2K[22A[2K[21A[2K[17A[2K[18A[2K[22A[2K[17A[2K[21A[2K[18A[2K[22A[2K[21A[2K[17A[2K[18A[2K[21A[2K[18A

## Install Kubeflow SDk

In [36]:
!pip install kfp



# Building Kubeflow Pipeline

In [37]:
import kfp
import kfp.components as comp
from kubernetes.client.models import V1EnvVar

## Define kubeflow pipeline Component

In [38]:
@kfp.dsl.component
def get_data():
    # Defining component configuration
    getdata_component = kfp.dsl.ContainerOp(
        name='Data-Preparation',
        image='docker.io/ankitaj86/kubeflow-sdk-farmer',
        command=['python', 'get_data.py'],
        )
    return getdata_component

In [39]:
@kfp.dsl.component
def data_preprocessing():    
    # Defining component configuration
    data_preprocessing = kfp.dsl.ContainerOp(
        name='data-preprocessing',
        image='docker.io/ankitaj86/kubeflow-sdk-farmer',
        command=['python', 'process_data.py'],
        )
    return data_preprocessing

In [40]:
@kfp.dsl.component
def training():
    # Defining component configuration
    training_component = kfp.dsl.ContainerOp(
        name='training',
        image='docker.io/ankitaj86/kubeflow-sdk-farmer',
        command=['python', 'train.py'],
        file_outputs={'mlpipeline-ui-metadata':'/mlpipeline-ui-metadata.json', "mlpipeline-metrics":'/mlpipeline-metrics.json'}
        )
    
    return training_component

In [41]:
# Let see output of component configuration
debug = True
if debug :
    training_component_vis = training()
    print(training_component_vis)

{'ContainerOp': {'is_exit_handler': False, 'human_name': 'training', 'display_name': None, 'name': 'training 800007f7074a6f5b', 'node_selector': {}, 'volumes': [], 'tolerations': [], 'affinity': {}, 'pod_annotations': {}, 'pod_labels': {}, 'num_retries': 0, 'retry_policy': None, 'backoff_factor': None, 'backoff_duration': None, 'backoff_max_duration': None, 'timeout': 0, 'init_containers': [], 'sidecars': [], 'loop_args': None, '_component_spec_inputs_with_pipeline_params': [], '_inputs': [], 'dependent_names': [], 'enable_caching': True, 'attrs_with_pipelineparams': ['node_selector', 'volumes', 'pod_annotations', 'pod_labels', 'num_retries', 'init_containers', 'sidecars', 'tolerations', '_container', 'artifact_arguments', '_parameter_arguments'], '_is_v2': False, '_container': {'args': None,
 'command': ['python', 'train.py'],
 'env': None,
 'env_from': None,
 'image': 'docker.io/ankitaj86/kubeflow-sdk-farmer',
 'image_pull_policy': None,
 'lifecycle': None,
 'liveness_probe': None,
 



## Final Kubeflow pipeline Definition

In [43]:
@kfp.dsl.pipeline(
  name="Modeling simple sentiment Analysis pipeline",
  description="First intilaized pipeline to run over Kubeflow"
)
def kfp_pipline():
    download_data = get_data()
    download_data.execution_options.caching_strategy.max_cache_staleness = "P0D"
    data_processing = data_preprocessing().after(download_data)
    data_processing.execution_options.caching_strategy.max_cache_staleness = "P0D"
    train = training().after(data_processing)
    train.execution_options.caching_strategy.max_cache_staleness = "P0D"


In [None]:
# Let see output of pipeline configuration
debug = True
if debug :
    training_pipeline_output = kfp_pipline()
    print(training_pipeline_output)

## Compile Kubeflow Pipeline 
* It will Generate .zip file inside this contain YAMl file which contain the configuration of kubeflow pipeline

In [None]:
kfp.compiler.Compiler().compile(kfp_pipline, 'farmer-kubeflow-sdk-demo.zip')

## Connect to deployed kubeflow pipeline Endpoint (GCP)

In [None]:
# Create kfp client
# Note: Add the KubeFlow Pipeline endpoint below if the client is not running on the same cluster.
client = kfp.Client("57f227181c3ac171-dot-us-central1.pipelines.googleusercontent.com")

### Create Experiment 

In [None]:
EXPERIMENT_NAME = 'First-Pipeline-1'
experiment = client.create_experiment(name=EXPERIMENT_NAME)

### Deploy pipeline to kubeflow pipeline Endpoint

In [None]:
run = client.run_pipeline(experiment.id, 'First-run-3', 'Sentiment-kubeflow-sdk-demo.zip')