# Create Kubeflow Pipeline



In [None]:
import kfp

client = kfp.Client(host='<TO DEFINE> ex: https://5ab1dd08e55a522c-dot-europe-west1.pipelines.googleusercontent.com')
import kfp.components as comp
from kfp.components import create_component_from_func
import kfp.dsl as dsl

In [None]:
%%bash
#!/bin/bash -e
project_id='par-devoxx-sfeir'
zone='europe-west1-c'
cluster='<TO DEFINE> ex: cluster-1 '

gcloud container clusters get-credentials $cluster --zone $zone --project $project_id

## Define kubeflow components
### 1- Docker image components 

In [None]:
with open("./components/image_tfrecord.yaml", "r") as f:
    image_tfrecord_components = f.read()
print(image_tfrecord_components)

In [None]:
create_step_convert_tfrecords = comp.load_component_from_text(image_tfrecord_components)

### 2- Python based components

In [None]:
def preprocess_flag(preprocess: bool) -> bool:
    """
    Print arguments
    """
    print("Preprocess ", preprocess)
    return preprocess


print_preprocess = comp.func_to_container_op(preprocess_flag)

In [None]:
def createTraningJob(training_data: str,
                     validation_data: str,
                     project: str,
                     location: str,
                     bucket: str,
                     batch_size: str = '50',
                     validation_batch_size: str = '20',
                     training_ds_size: str = '25000',
                     validation_ds_size: str = '5000',
                     img_height: str = '64',
                     img_width: str = '64',
                     nb_classes: str = '5',
                     display_name: str = 'quickdraw_training'
                     ) -> str:
    """
    Create the training job into Vertex training and launch it 
    :param training_data:  (str) GCS path to the training dataset Tfrecords,
    :param validation_data: (str) GCS path to the validation dataset Tfrecords,
    :param project: (str) Name of the Google cloud project,
    :param location: (str) Training location in vetex (europe-west1,...) ,
    :param bucket: (str) GCS bucket to store data during the training,
    :param batch_size: (str) Training Batch size '50',
    :param validation_batch_size: (str) Validation batch size defaut '20',
    :param  training_ds_size: (str) training dataset size default 25000,
    :param  validation_ds_size: (str) validation dataset size default 5000,
    :param  img_height: (str) image height size default 255,
    :param  img_width: (str) image widht size default 255,
    :param  nb_classes: (str) number of class default '5',
    :param  display_name:(str)Vertex job display name default'quickdraw_training'
    :return: GCS path for the trained model
    """

    from datetime import datetime
    import google.cloud.aiplatform as aip

    display_job_name = display_name
    staging_bucket = bucket + "staging/" + display_job_name
    model_path = bucket + "gcs_model_data/" + display_job_name

    env_var = {'GCS_TRAINING_DATA': training_data,
               'GCS_VALIDATION_DATA': validation_data,
               'GCS_MODEL_DATA_PATH': model_path
               }

    job = aip.CustomPythonPackageTrainingJob(
        display_name=display_job_name,
        python_package_gcs_uri='gs://<TO DEFINE>/vertex_job_code/quickdraw_classifier-0.0.1.tar.gz',
        python_module_name="quickdraw_classifier.training",
        container_uri='europe-docker.pkg.dev/vertex-ai/training/tf-gpu.2-8:latest',
        model_serving_container_image_uri='europe-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-8:latest',
        project=project,
        location=location,
        staging_bucket=bucket
    )

    CMDARGS = [
        '--batch_size=' + batch_size,
        '--validation_batch_size=' + validation_batch_size,
        '--training_ds_size=' + training_ds_size,
        '--validation_ds_size=' + validation_ds_size,
        '--img_height=' + img_height,
        '--img_width=' + img_width,
        '--nb_classes=' + nb_classes
    ]

    print(CMDARGS)

    model = job.run(
        args=CMDARGS,
        environment_variables=env_var,
        sync=True,
        replica_count=1,
        machine_type='n1-standard-8',
        accelerator_type='NVIDIA_TESLA_K80',
        accelerator_count=1,
        base_output_dir=model_path)

    return model.name


In [None]:
train_model = create_component_from_func(
    createTraningJob, output_component_file='./components/train_model.yaml',
    base_image='gcr.io/deeplearning-platform-release/tf2-cpu.2-8')


## Assemble componenent for first Pipeline definition
### Define your pipeline parameters

In [None]:
username = "<TO DEFINE>"
display_name = username + '_quickdraw_training'
pipeline_name = username + '_quickdraw_pipeline'
experiment_name = username + '_quickdraw'

### Define your pipeline

In [None]:

@dsl.pipeline(
    name='Quickdraw classifier ',
    description='A trainer that does end-to-end distributed training for Quickdraw classifier.'
)
def kubeflow_pipeline(
        images_path: str,
        tfrecords_path: str,
        image_validation_path: str,
        tfrecords_validation_path: str,
        bucket: str,
        location: str,
        project: str,
        training_data: str,
        validation_data: str,
        batch_size: str = '50',
        validation_batch_size: str = '20',
        training_ds_size: str = '25000',
        validation_ds_size: str = '5000',
        image_size: int = 64,
        img_height: str = '64',
        img_width: str = '64',
        nb_classes: str = '5',
        display_name: str = 'quickdraw_training',
        preprocess: bool = False,
        deploy_model: bool = False

):
    check_preprocess = print_preprocess(preprocess)

    with dsl.Condition(check_preprocess.output == True, name="do-preprocessing"):
        convert_train = create_step_convert_tfrecords(
            images_path=images_path,
            tfrecords_path=tfrecords_path,
            target_size=image_size
        ).set_display_name('convert_training_data').after(check_preprocess)

        convert_valid = create_step_convert_tfrecords(
            images_path=image_validation_path,
            tfrecords_path=tfrecords_validation_path,
            target_size=image_size
        ).set_display_name('convert_validation_data').after(check_preprocess)

    train_model_step = train_model(training_data=training_data,
                                   validation_data=validation_data,
                                   project=project,
                                   location=location,
                                   bucket=bucket,
                                   batch_size=batch_size,
                                   validation_batch_size=validation_batch_size,
                                   training_ds_size=training_ds_size,
                                   validation_ds_size=validation_ds_size,
                                   img_height=img_height,
                                   img_width=img_width,
                                   nb_classes=nb_classes,
                                   display_name=display_name).after(convert_valid, convert_train).set_display_name(
        'Training_Model')



### Compile Pipeline

In [None]:
kfp.compiler.Compiler().compile(
    pipeline_func=kubeflow_pipeline,
    package_path='quickdraw_pipeline.yaml')

### Upload Pipeline to Kubeflow

In [None]:
pipeline = client.upload_pipeline(
    pipeline_package_path="quickdraw_pipeline.yaml", pipeline_name=pipeline_name)
print(pipeline)

### Define arguments for the training 

In [None]:
images_path = "gs://devoxx_quickdraw/raw_images/training_data/*/*.png"
tfrecords_path = "gs://devoxx_quickdraw/tfrecord_data/training_data/"
image_validation_path = "gs://devoxx_quickdraw/raw_images/validation_data/*/*.png"
tfrecords_validation_path = "gs://devoxx_quickdraw/tfrecord_data/validation_data/"

bucket = "gs://<TO DEFINE>/"
location = "europe-west1"
project = "par-devoxx-sfeir"
training_data = 'gs://devoxx_quickdraw/tfrecord_data/training_data/'
validation_data = 'gs://devoxx_quickdraw/tfrecord_data/validation_data/'
image_size = 64

args = {'images_path': images_path,
        'tfrecords_path': tfrecords_path,
        'image_validation_path': image_validation_path,
        'tfrecords_validation_path': tfrecords_validation_path,
        'bucket': bucket,
        'location': location,
        'project': project,
        'training_data': training_data,
        'validation_data': validation_data,
        'display_name': display_name,
        'preprocess': True,
        'deploy_model': True,
        'image_size': image_size}

### Create an experiment.

In [None]:
try:
    experiment = client.get_experiment(experiment_name=experiment_name)
except:
    print("Experiment does not already exist")
    experiment = client.create_experiment(name=experiment_name)

In [None]:
print(experiment)

### Launch pipeline into the Experiment 

In [None]:
job = client.run_pipeline(job_name=display_name, experiment_id=experiment.id, params=args, pipeline_id=pipeline.id)

## Assemble componenent for second Pipeline definition
### Create Deployement task 

In [None]:
def deploy_model(
        project: str,
        region: str,
        model_name: str

):
    from google.cloud import aiplatform
    aiplatform.init(project=project, location=region)

    endpoint_name = model_name + "_endpoint"

    def create_endpoint():
        endpoints = aiplatform.Endpoint.list(
            filter='displayName="{}"'.format(endpoint_name),
            order_by='create_time desc',
            project=project,
            location=region,
        )
        if len(endpoints) > 0:
            endpoint = endpoints[0]  # most recently created
        else:
            endpoint = aiplatform.Endpoint.create(
                display_name=endpoint_name, project=project, location=region
            )

    endpoint = create_endpoint()

    #Import a model programmatically
    """
    model_upload = aiplatform.Model.upload(
        display_name = model_name, 
        artifact_uri = model.uri.replace("model", ""),
        serving_container_image_uri =  serving_container_image_uri,
        serving_container_health_route=f"/v1/models/{model_name}",
        serving_container_predict_route=f"/v1/models/{model_name}:predict",
        serving_container_environment_variables={
        "model_name": model_name,
    },       
    )"""

    model = aiplatform.Model(model_name=model_name)

    model_deploy = model.deploy(
        machine_type="n1-standard-4",
        endpoint=endpoint,
        traffic_split={"0": 100},
        deployed_model_display_name=model_name,
    )

    # Save data to the output params
    return model_deploy.resource_name

In [None]:
deploy = create_component_from_func(
    deploy_model, output_component_file='./components/model_deployment.yaml',
    base_image='gcr.io/deeplearning-platform-release/tf2-cpu.2-8')


### Create pipeline 

In [None]:
@dsl.pipeline(
    name='Quickdraw classifier_full',
    description='A trainer that does end-to-end distributed training for Quickdraw classifier.'
)
def kubeflow_pipeline(
        images_path: str,
        tfrecords_path: str,
        image_validation_path: str,
        tfrecords_validation_path: str,
        bucket: str,
        location: str,
        project: str,
        training_data: str,
        validation_data: str,
        batch_size: str = '50',
        validation_batch_size: str = '20',
        training_ds_size: str = '25000',
        validation_ds_size: str = '5000',
        image_size: int = 64,
        img_height: str = '64',
        img_width: str = '64',
        nb_classes: str = '5',
        display_name: str = 'quickdraw_training',
        preprocess: bool = False,
        deploy_model: bool = False

):
    check_preprocess = print_preprocess(preprocess)

    with dsl.Condition(check_preprocess.output == True, name="do-preprocessing"):
        convert_train = create_step_convert_tfrecords(
            images_path=images_path,
            tfrecords_path=tfrecords_path,
            target_size=image_size
        ).set_display_name('convert_training_data').after(check_preprocess)

        convert_valid = create_step_convert_tfrecords(
            images_path=image_validation_path,
            tfrecords_path=tfrecords_validation_path,
            target_size=image_size
        ).set_display_name('convert_validation_data').after(check_preprocess)

    train_model_step = train_model(training_data=training_data,
                                   validation_data=validation_data,
                                   project=project,
                                   location=location,
                                   bucket=bucket,
                                   batch_size=batch_size,
                                   validation_batch_size=validation_batch_size,
                                   training_ds_size=training_ds_size,
                                   validation_ds_size=validation_ds_size,
                                   img_height=img_height,
                                   img_width=img_width,
                                   nb_classes=nb_classes,
                                   display_name=display_name).after(convert_valid, convert_train).set_display_name(
        'Training_Model')

    with dsl.Condition(deploy_model == True, name="deploy-model"):
        deploy_model_op = deploy(
            project=project,
            region=location,
            model_name=train_model_step.output
        ).after(train_model_step)



In [None]:
kfp.compiler.Compiler().compile(
    pipeline_func=kubeflow_pipeline,
    package_path='quickdraw_pipeline_full.yaml')

### Update the pipeline version

In [None]:
pipeline_full = client.upload_pipeline_version(pipeline_package_path='quickdraw_pipeline_full.yaml',
                                               pipeline_version_name=pipeline_name + "_full",
                                               pipeline_id=pipeline.id, )
print(pipeline_full)

### Launch Pipeline

In [None]:
client.run_pipeline(job_name=display_name, experiment_id=experiment.id, params=args, pipeline_id=pipeline_full.id)