In [8]:

import kfp.dsl as dsl
import yaml
from kubernetes import client as k8s


@dsl.pipeline(
  name='Image Caption',
  description='A pipeline demonstrating reproducible steps for image caption'
)
def image_caption_pipeline(
        size_of_pvc="1Gi",
        zip_url="http://140.114.79.84/ms-coco.zip",
        dataset_path="/mnt/ms-coco",
        images_path="/mnt/ms-coco/train2014/train2014",
        annotation_path="/mnt/ms-coco/annotations_trainval2014/annotations/captions_train2014.json",
        preprocessing_batch_size=8,
        num_examples=30000,
        epochs=20,
        training_batch_size=16,
        hidden_state_size=512,
        vocab_size=5000,
        embedding_dim=256,
        max_length=50
    ):
    """
    Pipeline 
    """
    vop = dsl.VolumeOp(
      name='my-pvc',
      resource_name="my-pvc",
      modes=["ReadWriteMany"],
      size=size_of_pvc
    )

    download_step = dsl.ContainerOp(
        name='data_downloader',
        image='chaowen/img_caption_data_downloader:latest',
        command="python",
        arguments=[
            "/microservice/pipeline_step.py",
            "--zip-url", zip_url
        ],
        pvolumes={"/mnt": vop.volume}
    )
    
    preprocess_step = dsl.ContainerOp(
        name='preprocesing',
        image='chaowen/img_caption_preprocess:latest',
        command="python",
        arguments=["/microservice/pipeline_step.py",
            "--dataset-path", dataset_path,
            "--images-path", images_path,
            "--annotation-path", annotation_path,
            "--num-examples", num_examples,
            "--batch-size", preprocessing_batch_size
        ],
        pvolumes={"/mnt": download_step.pvolume}
    )
    
    
    tokenize_step = dsl.ContainerOp(
        name='tokenizer',
        image='chaowen/img_caption_tokenize:latest',
        command="python",
        arguments=["/microservice/pipeline_step.py",
                  "--dataset-path", dataset_path,
                  "--top-k", vocab_size
        ],
        pvolumes={"/mnt": preprocess_step.pvolume}
    )
    
    train_step = dsl.ContainerOp(
        name='training',
        image='chaowen/img_caption_train:latest',
        command="python",
        arguments=["/microservice/pipeline_step.py",
                "--dataset-path", dataset_path,
                "--epochs", epochs,
                "--batch-size", training_batch_size,
                "--embedding-dim", embedding_dim,
                "--units", hidden_state_size
        ],
        output_artifact_paths={
          'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json',
          'mlpipeline-metrics': '/mlpipeline-metrics.json',
        },
        file_outputs={'tensorboard_dir': '/mnt/ms-coco/train/logs/'},
        pvolumes={"/mnt": tokenize_step.pvolume}
    )
    
    predict_step = dsl.ContainerOp(
        name='prediction',
        image='chaowen/img_caption_predict:latest',
        command="python",
        arguments=["/microservice/pipeline_step.py",
                "--dataset-path", dataset_path,
                "--units", hidden_state_size,
                "--embedding-dim", embedding_dim,
                "--max-length", max_length],
        output_artifact_paths={
          'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json',
          'mlpipeline-metrics': '/mlpipeline-metrics.json',
        },
        file_outputs={'tensorboard_dir': '/mnt/ms-coco/valid/logs/'},
        pvolumes={"/mnt": train_step.pvolume}
    )
    
    seldon_config = yaml.load(open("./seldon_production_pipeline.yaml"))

    deploy_step = dsl.ResourceOp(
        name="seldondeploy",
        k8s_resource=seldon_config,
        attribute_outputs={"name": "{.metadata.name}"})

    deploy_step.after(predict_step)
    
    
    download_step.container.image_pull_policy = "Always"
    preprocess_step.container.image_pull_policy = "Always"
    tokenize_step.container.image_pull_policy = "Always"
    train_step.container.image_pull_policy = "Always"
    predict_step.container.image_pull_policy = "Always"

if __name__ == '__main__':
  import kfp.compiler as compiler
  compiler.Compiler().compile(image_caption_pipeline, 'image_caption_volume.tar.gz')


