In [1]:
import kfp, os
import kfp.dsl as dsl 
import kfp.compiler as compiler
import kubernetes.client.models as k8s
import namesgenerator
from utils import *

In [2]:
NAMESPACE = os.environ.get("NAMESPACE") 

In [3]:
kubeflow_address = f"http://{NAMESPACE}.kubeflow.odsc.k8s.hydrosphere.io"
hydrosphere_address = f"http://{NAMESPACE}.serving.odsc.k8s.hydrosphere.io"

In [4]:
!docker login

Authenticating with existing credentials...
Login Succeeded


# Download

At this stage we will obtain all training data for our pipeline.

Build & publish an image

In [5]:
%%bash 
docker build -t tidylobster/mnist-pipeline-download:latest --no-cache 01_download
docker push tidylobster/mnist-pipeline-download:latest

Sending build context to Docker daemon  73.55MB
Step 1/4 : FROM tidylobster/odsc-base:1.0
 ---> a44d37e5b862
Step 2/4 : ADD ./download.py /src/
 ---> a3841e6d6547
Step 3/4 : WORKDIR /src/
 ---> Running in ab76f97760ca
Removing intermediate container ab76f97760ca
 ---> f2dabc0d28be
Step 4/4 : ENTRYPOINT [ "python", "download.py" ]
 ---> Running in 38eca19a76f8
Removing intermediate container 38eca19a76f8
 ---> 1d6489f5993f
Successfully built 1d6489f5993f
Successfully tagged tidylobster/mnist-pipeline-download:latest
The push refers to repository [docker.io/tidylobster/mnist-pipeline-download]
df09be6d9ed4: Preparing
66a75017de07: Preparing
83c720aa6f39: Preparing
56995c671038: Preparing
eee35c27cf87: Preparing
93ed1238773c: Preparing
eeb5ce6b3db4: Preparing
886601877ba4: Preparing
0fc100fdc7f9: Preparing
68dda0c9a8cd: Preparing
f67191ae09b8: Preparing
b2fd8b4c3da7: Preparing
0de2edf7bff4: Preparing
93ed1238773c: Waiting
eeb5ce6b3db4: Waiting
886601877ba4: Waiting
0fc100fdc7f9: Waiting

### Pipeline

Create Kubernetes PVC resource

In [6]:
storage_pvc = k8s.V1PersistentVolumeClaimVolumeSource(claim_name="storage")
storage_volume = k8s.V1Volume(name="storage", persistent_volume_claim=storage_pvc)
storage_volume_mount = k8s.V1VolumeMount(mount_path="{{workflow.parameters.mount-path}}", name="storage")

Define container operation

In [7]:
username = "tidylobster"

In [8]:
def download_op(**kwargs):
    download = dsl.ContainerOp(
        name="download",
        image=f"{username}/mnist-pipeline-download:latest",
        file_outputs={"data_path": "/data_path.txt"},
        arguments=[
            "--mount-path", kwargs["mount_path"]
        ],
    )
    
    download.add_volume(storage_volume)
    download.add_volume_mount(storage_volume_mount)
    return download

In [9]:
@dsl.pipeline(name="mnist", description="MNIST classifier")
def pipeline_definition(
    mount_path="/storage",
):
    
    download = download_op(
        mount_path=mount_path)

In [10]:
compiler.Compiler().compile(pipeline_definition, "pipeline.tar.gz")

In [11]:
%%bash 

tar -xvf pipeline.tar.gz
sed -i "s/minio-service.kubeflow/minio-service.${NAMESPACE}/g" pipeline.yaml
sed -i "s/pipeline-runner/${NAMESPACE}-pipeline-runner/g" pipeline.yaml

x pipeline.yaml


### Test

In [11]:
# Create Pipelines client
client = kfp.Client(kubeflow_address)

In [12]:
# Define an experiment name
experiment_name='MNIST Showreal'

In [13]:
# get or create an experiment_id
try:
    experiment_id = client.get_experiment(experiment_name=experiment_name).id
except:
    experiment_id = client.create_experiment(experiment_name).id

In [14]:
# start a run
run_name = namesgenerator.get_random_name()
print("Starting a new run with the name {}".format(run_name))
result = client.run_pipeline(experiment_id, run_name, "pipeline.yaml")

Starting a new run with the name amazing_leavitt


# Train

At this stage we will create a model & train it on the downloaded data.

Build and publish image

In [15]:
%%bash 
docker build -t tidylobster/mnist-pipeline-train:latest --no-cache 02_train/ 
docker push tidylobster/mnist-pipeline-train:latest

Sending build context to Docker daemon  57.28MB
Step 1/4 : FROM tidylobster/odsc-base:1.0
 ---> a44d37e5b862
Step 2/4 : ADD ./train.py /src/
 ---> 977d3beb5292
Step 3/4 : WORKDIR /src/
 ---> Running in 22df3fbcabe5
Removing intermediate container 22df3fbcabe5
 ---> b2e233791247
Step 4/4 : ENTRYPOINT [ "python", "train.py" ]
 ---> Running in 45c21c8a0e0f
Removing intermediate container 45c21c8a0e0f
 ---> 3e8beead0995
Successfully built 3e8beead0995
Successfully tagged tidylobster/mnist-pipeline-train:latest
The push refers to repository [docker.io/tidylobster/mnist-pipeline-train]
e5a85da65259: Preparing
66a75017de07: Preparing
83c720aa6f39: Preparing
56995c671038: Preparing
eee35c27cf87: Preparing
93ed1238773c: Preparing
eeb5ce6b3db4: Preparing
886601877ba4: Preparing
0fc100fdc7f9: Preparing
68dda0c9a8cd: Preparing
f67191ae09b8: Preparing
b2fd8b4c3da7: Preparing
0de2edf7bff4: Preparing
886601877ba4: Waiting
0fc100fdc7f9: Waiting
68dda0c9a8cd: Waiting
f67191ae09b8: Waiting
b2fd8b4c3da

### Pipeline

Define container operation

In [16]:
def train_op(download, **kwargs):
    train = dsl.ContainerOp(
        name="train",
        image=f"{username}/mnist-pipeline-train:latest",
        file_outputs={"accuracy": "/accuracy.txt"},
        arguments=[
            "--data-path", download.outputs["data_path"], 
            "--mount-path", kwargs["mount_path"],
            "--learning-rate", kwargs["learning_rate"],
            "--epochs", kwargs["epochs"],
            "--batch-size", kwargs["batch_size"]
        ],
    )

    train.add_volume(storage_volume)
    train.add_volume_mount(storage_volume_mount)
    return train

In [17]:
@dsl.pipeline(name="mnist", description="MNIST classifier")
def pipeline_definition(
    mount_path="/storage",
    learning_rate="0.01",
    epochs="10",
    batch_size="256",
):
    
    download = download_op(
        mount_path=mount_path)
    
    train = train_op(
        download, 
        mount_path=mount_path, 
        learning_rate=learning_rate,
        epochs=epochs,
        batch_size=batch_size)
    train.after(download)
    train.set_memory_request('1G')
    train.set_cpu_request('1')

In [18]:
compiler.Compiler().compile(pipeline_definition, "pipeline.tar.gz")

In [19]:
%%bash 

tar -xvf pipeline.tar.gz
sed -i "s/minio-service.kubeflow/minio-service.${NAMESPACE}/g" pipeline.yaml
sed -i "s/pipeline-runner/${NAMESPACE}-pipeline-runner/g" pipeline.yaml

x pipeline.yaml


### Test

In [34]:
# start a run
run_name = namesgenerator.get_random_name()
print("Starting a new run with the name {}".format(run_name))
result = client.run_pipeline(
    experiment_id, run_name, "pipeline.yaml",
    {
        "learning-rate": "0.01",
        "batch-size": "256",
        "epochs": "10"
    }
)

Starting a new run with the name romantic_albattani


# Release

At this stage we will release the trained model to the cluster

Build and publish image

In [20]:
%%bash 
docker build -t tidylobster/mnist-pipeline-release:latest --no-cache 03_release/ 
docker push tidylobster/mnist-pipeline-release:latest

Sending build context to Docker daemon  5.632kB
Step 1/4 : FROM tidylobster/odsc-base:1.0
 ---> a44d37e5b862
Step 2/4 : ADD ./execute.sh /src/
 ---> 516cd80def8f
Step 3/4 : WORKDIR /src/
 ---> Running in 6ac0166071a4
Removing intermediate container 6ac0166071a4
 ---> 7ef9caae7756
Step 4/4 : ENTRYPOINT [ "bash", "execute.sh" ]
 ---> Running in 5eb4c50fb456
Removing intermediate container 5eb4c50fb456
 ---> f468359fbd5d
Successfully built f468359fbd5d
Successfully tagged tidylobster/mnist-pipeline-release:latest
The push refers to repository [docker.io/tidylobster/mnist-pipeline-release]
09aed34a2dac: Preparing
66a75017de07: Preparing
83c720aa6f39: Preparing
56995c671038: Preparing
eee35c27cf87: Preparing
93ed1238773c: Preparing
eeb5ce6b3db4: Preparing
886601877ba4: Preparing
0fc100fdc7f9: Preparing
68dda0c9a8cd: Preparing
f67191ae09b8: Preparing
b2fd8b4c3da7: Preparing
0de2edf7bff4: Preparing
93ed1238773c: Waiting
eeb5ce6b3db4: Waiting
886601877ba4: Waiting
0fc100fdc7f9: Waiting
68dda

### Pipeline

Define container operation

In [21]:
def release_op(download, train, **kwargs):
    release = dsl.ContainerOp(
        name="release",
        image=f"{username}/mnist-pipeline-release:latest",
        file_outputs={"model-version": "/model-version.txt"},
        arguments=[
            "--data-path", download.outputs["data_path"],
            "--mount-path", kwargs["mount_path"],
            "--model-name", kwargs["model_name"],
            "--accuracy", train.outputs["accuracy"], 
            "--hydrosphere-address", kwargs["hydrosphere_address"],
            "--learning-rate", kwargs["learning_rate"],
            "--epochs", kwargs["epochs"],
            "--batch-size", kwargs["batch_size"],
        ],
    )

    release.add_volume(storage_volume) 
    release.add_volume_mount(storage_volume_mount)
    return release

In [22]:
@dsl.pipeline(name="mnist", description="MNIST classifier")
def pipeline_definition(
    mount_path="/storage",
    learning_rate="0.01",
    epochs="10",
    batch_size="256",
    model_name="mnist",
    hydrosphere_address=""
):
    
    download = download_op(
        mount_path=mount_path)
    
    train = train_op(
        download, 
        mount_path=mount_path, 
        learning_rate=learning_rate,
        epochs=epochs,
        batch_size=batch_size)
    train.after(download)
    train.set_memory_request('1G')
    train.set_cpu_request('1')
    
    release = release_op(
        download, 
        train,
        mount_path=mount_path,
        model_name=model_name,
        hydrosphere_address=hydrosphere_address,
        learning_rate=learning_rate,
        epochs=epochs,
        batch_size=batch_size)
    release.after(train)

In [23]:
compiler.Compiler().compile(pipeline_definition, "pipeline.tar.gz")

In [24]:
%%bash 

tar -xvf pipeline.tar.gz
sed -i "s/minio-service.kubeflow/minio-service.${NAMESPACE}/g" pipeline.yaml
sed -i "s/pipeline-runner/${NAMESPACE}-pipeline-runner/g" pipeline.yaml

x pipeline.yaml


### Test

In [28]:
# start a run
run_name = namesgenerator.get_random_name()
print("Starting a new run with the name {}".format(run_name))
result = client.run_pipeline(
    experiment_id, run_name, "pipeline.yaml",
    {
        "learning-rate": "0.01",
        "batch-size": "256",
        "epochs": "10",
        "hydrosphere-address": hydrosphere_address,
    }
)

Starting a new run with the name tender_shaw


# Deploy to Stage

At this stage we are deploying the model to the stage application to run integration tests.

Build and publish image

In [25]:
%%bash 
docker build -t tidylobster/mnist-pipeline-deploy-to-stage:latest --no-cache 04_deploy-to-stage/ 
docker push tidylobster/mnist-pipeline-deploy-to-stage:latest

Sending build context to Docker daemon  4.096kB
Step 1/4 : FROM tidylobster/odsc-base:1.0
 ---> a44d37e5b862
Step 2/4 : ADD ./execute.sh /src/
 ---> 04806b527d82
Step 3/4 : WORKDIR /src/
 ---> Running in 2631adbd23bc
Removing intermediate container 2631adbd23bc
 ---> 8eea1b517fc3
Step 4/4 : ENTRYPOINT [ "bash", "execute.sh" ]
 ---> Running in f6bb1bbb765d
Removing intermediate container f6bb1bbb765d
 ---> 01b09d21bd0f
Successfully built 01b09d21bd0f
Successfully tagged tidylobster/mnist-pipeline-deploy-to-stage:latest
The push refers to repository [docker.io/tidylobster/mnist-pipeline-deploy-to-stage]
3c43921fdea3: Preparing
66a75017de07: Preparing
83c720aa6f39: Preparing
56995c671038: Preparing
eee35c27cf87: Preparing
93ed1238773c: Preparing
eeb5ce6b3db4: Preparing
886601877ba4: Preparing
0fc100fdc7f9: Preparing
68dda0c9a8cd: Preparing
f67191ae09b8: Preparing
b2fd8b4c3da7: Preparing
0de2edf7bff4: Preparing
93ed1238773c: Waiting
eeb5ce6b3db4: Waiting
886601877ba4: Waiting
0fc100fdc7f

### Pipeline

Define container operation

In [26]:
def deploy_to_stage_op(release, **kwargs):
    deploy_to_stage = dsl.ContainerOp(
        name="deploy_to_stage",
        image=f"{username}/mnist-pipeline-deploy-to-stage:latest",
        file_outputs={"stage-app-name": "/stage-app-name.txt"},
        arguments=[
            "--model-version", release.outputs["model-version"],
            "--hydrosphere-address", kwargs["hydrosphere_address"],
            "--model-name", kwargs["model_name"],
        ],
    )
    
    return deploy_to_stage

In [27]:
@dsl.pipeline(name="mnist", description="MNIST classifier")
def pipeline_definition(
    mount_path="/storage",
    learning_rate="0.01",
    epochs="10",
    batch_size="256",
    model_name="mnist",
    hydrosphere_address="",
):
    
    download = download_op(
        mount_path=mount_path)
    
    train = train_op(
        download, 
        mount_path=mount_path, 
        learning_rate=learning_rate,
        epochs=epochs,
        batch_size=batch_size)
    train.after(download)
    train.set_memory_request('1G')
    train.set_cpu_request('1')
    
    release = release_op(
        download, 
        train,
        mount_path=mount_path,
        model_name=model_name,
        hydrosphere_address=hydrosphere_address,
        learning_rate=learning_rate,
        epochs=epochs,
        batch_size=batch_size)
    release.after(train)
    
    deploy_to_stage = deploy_to_stage_op(
        release,
        hydrosphere_address=hydrosphere_address,
        model_name=model_name)
    deploy_to_stage.after(release)


In [28]:
compiler.Compiler().compile(pipeline_definition, "pipeline.tar.gz")

In [29]:
%%bash 

tar -xvf pipeline.tar.gz
sed -i "s/minio-service.kubeflow/minio-service.${NAMESPACE}/g" pipeline.yaml
sed -i "s/pipeline-runner/${NAMESPACE}-pipeline-runner/g" pipeline.yaml

x pipeline.yaml


### Test

In [41]:
# start a run
run_name = namesgenerator.get_random_name()
print("Starting a new run with the name {}".format(run_name))
result = client.run_pipeline(
    experiment_id, run_name, "pipeline.yaml",
    {
        "learning-rate": "0.01",
        "batch-size": "256",
        "epochs": "10",
        "hydrosphere-address": hydrosphere_address,
    }
)

Starting a new run with the name awesome_austin


# Test

At this stage we are performing integration tests.

Build and publish image

In [30]:
%%bash 
docker build -t tidylobster/mnist-pipeline-test:latest --no-cache 05_test/ 
docker push tidylobster/mnist-pipeline-test:latest

Sending build context to Docker daemon  32.07MB
Step 1/4 : FROM tidylobster/odsc-base:1.0
 ---> a44d37e5b862
Step 2/4 : ADD ./test.py /src/
 ---> 26fb5e733c11
Step 3/4 : WORKDIR /src/
 ---> Running in 817bac0d7a6b
Removing intermediate container 817bac0d7a6b
 ---> 7492898ebf96
Step 4/4 : ENTRYPOINT [ "python", "test.py" ]
 ---> Running in 778df15f5b79
Removing intermediate container 778df15f5b79
 ---> 43fb53a3dd24
Successfully built 43fb53a3dd24
Successfully tagged tidylobster/mnist-pipeline-test:latest
The push refers to repository [docker.io/tidylobster/mnist-pipeline-test]
1926742676af: Preparing
66a75017de07: Preparing
83c720aa6f39: Preparing
56995c671038: Preparing
eee35c27cf87: Preparing
93ed1238773c: Preparing
eeb5ce6b3db4: Preparing
886601877ba4: Preparing
0fc100fdc7f9: Preparing
68dda0c9a8cd: Preparing
f67191ae09b8: Preparing
b2fd8b4c3da7: Preparing
0de2edf7bff4: Preparing
0fc100fdc7f9: Waiting
68dda0c9a8cd: Waiting
f67191ae09b8: Waiting
b2fd8b4c3da7: Waiting
93ed1238773c: W

### Pipeline

Define container operation

In [31]:
def test_op(download, stage, **kwargs):
    test = dsl.ContainerOp(
        name="test",
        image=f"{username}/mnist-pipeline-test:latest", 
        arguments=[
            "--stage-app-name", stage.outputs["stage-app-name"], 
            "--data-path", download.outputs["data_path"],
            "--mount-path", kwargs["mount_path"],
            "--hydrosphere-address", kwargs["hydrosphere_address"],
            "--acceptable-accuracy", kwargs["acceptable_accuracy"],
            "--model-name", kwargs["model_name"], 
        ],
    )

    test.add_volume(storage_volume) 
    test.add_volume_mount(storage_volume_mount)
    return test

In [32]:
@dsl.pipeline(name="mnist", description="MNIST classifier")
def pipeline_definition(
    mount_path="/storage",
    learning_rate="0.01",
    epochs="10",
    batch_size="256",
    model_name="mnist",
    hydrosphere_address="",
    acceptable_accuracy="0.90",
):
    
    download = download_op(
        mount_path=mount_path)
    
    train = train_op(
        download, 
        mount_path=mount_path, 
        learning_rate=learning_rate,
        epochs=epochs,
        batch_size=batch_size)
    train.after(download)
    train.set_memory_request('1G')
    train.set_cpu_request('1')
    
    release = release_op(
        download, 
        train,
        mount_path=mount_path,
        model_name=model_name,
        hydrosphere_address=hydrosphere_address,
        learning_rate=learning_rate,
        epochs=epochs,
        batch_size=batch_size)
    release.after(train)
    
    deploy_to_stage = deploy_to_stage_op(
        release,
        hydrosphere_address=hydrosphere_address,
        model_name=model_name)
    deploy_to_stage.after(release)
    
    test = test_op(
        download, 
        deploy_to_stage,
        mount_path=mount_path,
        hydrosphere_address=hydrosphere_address,
        acceptable_accuracy=acceptable_accuracy,
        model_name=model_name)
    test.set_retry(3)
    test.after(deploy_to_stage)

In [33]:
compiler.Compiler().compile(pipeline_definition, "pipeline.tar.gz")

In [34]:
%%bash 

tar -xvf pipeline.tar.gz
sed -i "s/minio-service.kubeflow/minio-service.${NAMESPACE}/g" pipeline.yaml
sed -i "s/pipeline-runner/${NAMESPACE}-pipeline-runner/g" pipeline.yaml

x pipeline.yaml


### Test

In [47]:
# start a run
run_name = namesgenerator.get_random_name()
print("Starting a new run with the name {}".format(run_name))
result = client.run_pipeline(
    experiment_id, run_name, "pipeline.yaml",
    {
        "learning-rate": "0.01",
        "batch-size": "256",
        "epochs": "10",
        "hydrosphere-address": hydrosphere_address,
        "acceptable-accuracy": "0.90",
    }
)

Starting a new run with the name suspicious_jepsen


# Deploy to Production

And finally deploying the model to production application

Build and publish image

In [35]:
%%bash 
docker build -t tidylobster/mnist-pipeline-deploy-to-prod:latest --no-cache 06_deploy-to-prod/ 
docker push tidylobster/mnist-pipeline-deploy-to-prod:latest

Sending build context to Docker daemon  3.584kB
Step 1/4 : FROM tidylobster/odsc-base:1.0
 ---> a44d37e5b862
Step 2/4 : ADD ./execute.sh /src/
 ---> 5b81cae3f71e
Step 3/4 : WORKDIR /src/
 ---> Running in 57ea748b77a2
Removing intermediate container 57ea748b77a2
 ---> 79bf11a3862f
Step 4/4 : ENTRYPOINT [ "bash", "execute.sh" ]
 ---> Running in 1b17f9ff44c4
Removing intermediate container 1b17f9ff44c4
 ---> 4de1f85b1b33
Successfully built 4de1f85b1b33
Successfully tagged tidylobster/mnist-pipeline-deploy-to-prod:latest
The push refers to repository [docker.io/tidylobster/mnist-pipeline-deploy-to-prod]
5866111ec03b: Preparing
66a75017de07: Preparing
83c720aa6f39: Preparing
56995c671038: Preparing
eee35c27cf87: Preparing
93ed1238773c: Preparing
eeb5ce6b3db4: Preparing
886601877ba4: Preparing
0fc100fdc7f9: Preparing
68dda0c9a8cd: Preparing
f67191ae09b8: Preparing
b2fd8b4c3da7: Preparing
0de2edf7bff4: Preparing
93ed1238773c: Waiting
eeb5ce6b3db4: Waiting
0fc100fdc7f9: Waiting
b2fd8b4c3da7:

### Pipeline

Define container operation

In [36]:
def deploy_to_prod_op(release, **kwargs):
    deploy_to_prod = dsl.ContainerOp(
        name="deploy_to_prod",
        image=f"{username}/mnist-pipeline-deploy-to-prod:latest",   # <-- Replace with correct docker image
        arguments=[
            "--model-version", release.outputs["model-version"],
            "--model-name", kwargs["model_name"],
            "--hydrosphere-address", kwargs["hydrosphere_address"]
        ],
    )

    return deploy_to_prod

In [37]:
@dsl.pipeline(name="mnist", description="MNIST classifier")
def pipeline_definition(
    mount_path="/storage",
    learning_rate="0.01",
    epochs="10",
    batch_size="256",
    model_name="mnist",
    hydrosphere_address="",
    acceptable_accuracy="0.90",
):
    
    download = download_op(
        mount_path=mount_path)
    
    train = train_op(
        download, 
        mount_path=mount_path, 
        learning_rate=learning_rate,
        epochs=epochs,
        batch_size=batch_size)
    train.after(download)
    train.set_memory_request('1G')
    train.set_cpu_request('1')
    
    release = release_op(
        download, 
        train,
        mount_path=mount_path,
        model_name=model_name,
        hydrosphere_address=hydrosphere_address,
        learning_rate=learning_rate,
        epochs=epochs,
        batch_size=batch_size)
    release.after(train)
    
    deploy_to_stage = deploy_to_stage_op(
        release,
        hydrosphere_address=hydrosphere_address,
        model_name=model_name)
    deploy_to_stage.after(release)
    
    test = test_op(
        download, 
        deploy_to_stage,
        mount_path=mount_path,
        hydrosphere_address=hydrosphere_address,
        acceptable_accuracy=acceptable_accuracy,
        model_name=model_name)
    test.set_retry(3)
    test.after(deploy_to_stage)
    
    deploy_to_prod = deploy_to_prod_op(
        release,
        model_name=model_name,
        hydrosphere_address=hydrosphere_address)
    deploy_to_prod.after(test)

In [38]:
compiler.Compiler().compile(pipeline_definition, "pipeline.tar.gz")

In [39]:
%%bash 

tar -xvf pipeline.tar.gz
sed -i "s/minio-service.kubeflow/minio-service.${NAMESPACE}/g" pipeline.yaml
sed -i "s/pipeline-runner/${NAMESPACE}-pipeline-runner/g" pipeline.yaml

x pipeline.yaml


### Test

In [40]:
# start a run
run_name = namesgenerator.get_random_name()
print("Starting a new run with the name {}".format(run_name))
result = client.run_pipeline(
    experiment_id, run_name, "pipeline.yaml",
    {
        "learning-rate": "0.01",
        "batch-size": "256",
        "epochs": "10",
        "hydrosphere-address": hydrosphere_address,
        "acceptable-accuracy": "0.90",
    }
)

Starting a new run with the name quizzical_dubinsky


# Production traffic

In [None]:
simulate_production_traffic(request_delay=0.5, shuffle=False)

# Sample

Once we have a fully functional pipeline, we would like to automate running this pipeline. But we don't 

Build and publish image

In [41]:
%%bash 
docker build -t tidylobster/mnist-pipeline-sample:latest --no-cache 01_sample/ 
docker push tidylobster/mnist-pipeline-sample:latest

Sending build context to Docker daemon  145.4kB
Step 1/4 : FROM tidylobster/odsc-base:1.0
 ---> a44d37e5b862
Step 2/4 : ADD ./sample.py /src/
 ---> d0bf9fa1e60f
Step 3/4 : WORKDIR /src/
 ---> Running in 74cf2db395d9
Removing intermediate container 74cf2db395d9
 ---> d897843f8920
Step 4/4 : ENTRYPOINT [ "python", "sample.py" ]
 ---> Running in 600c6e2c5138
Removing intermediate container 600c6e2c5138
 ---> b8eab9e6ea5b
Successfully built b8eab9e6ea5b
Successfully tagged tidylobster/mnist-pipeline-sample:latest
The push refers to repository [docker.io/tidylobster/mnist-pipeline-sample]
0d3f17e845d9: Preparing
66a75017de07: Preparing
83c720aa6f39: Preparing
56995c671038: Preparing
eee35c27cf87: Preparing
93ed1238773c: Preparing
eeb5ce6b3db4: Preparing
886601877ba4: Preparing
0fc100fdc7f9: Preparing
68dda0c9a8cd: Preparing
f67191ae09b8: Preparing
b2fd8b4c3da7: Preparing
0de2edf7bff4: Preparing
0fc100fdc7f9: Waiting
68dda0c9a8cd: Waiting
93ed1238773c: Waiting
f67191ae09b8: Waiting
b2fd8b4

### Pipeline

Define container operation

In [43]:
def sample_op(**kwargs):
    sample = dsl.ContainerOp(
        name="sample",
        image=f"{username}/mnist-pipeline-sample:latest", # <-- Replace with correct docker image
        file_outputs={"data_path": "/data_path.txt"},
        arguments=[
            "--mount-path", kwargs["mount_path"], 
            "--hydrosphere-address", kwargs["hydrosphere_address"],
        ]
    )  
    
    sample.add_volume(storage_volume)
    sample.add_volume_mount(storage_volume_mount)
    return sample

In [50]:
@dsl.pipeline(name="mnist", description="MNIST classifier")
def pipeline_definition(
    mount_path="/storage",
    learning_rate="0.01",
    epochs="10",
    batch_size="256",
    model_name="mnist",
    hydrosphere_address="",
    acceptable_accuracy="0.90",
):
    
    sample = sample_op(
        mount_path=mount_path, 
        hydrosphere_address=hydrosphere_address)
    
    train = train_op(
        sample, 
        mount_path=mount_path, 
        learning_rate=learning_rate,
        epochs=epochs,
        batch_size=batch_size)
    train.after(sample)
    train.set_memory_request('1G')
    train.set_cpu_request('1')
    
    release = release_op(
        sample, 
        train,
        mount_path=mount_path, 
        model_name=model_name,
        hydrosphere_address=hydrosphere_address,
        learning_rate=learning_rate,
        epochs=epochs,
        batch_size=batch_size)
    release.after(train)
    
    deploy_to_stage = deploy_to_stage_op(
        release,
        hydrosphere_address=hydrosphere_address,
        model_name=model_name)
    deploy_to_stage.after(release)
    
    test = test_op(
        sample, 
        deploy_to_stage,
        mount_path=mount_path,
        hydrosphere_address=hydrosphere_address,
        acceptable_accuracy=acceptable_accuracy,
        model_name=model_name)
    test.set_retry(3)
    test.after(deploy_to_stage)
    
    deploy_to_prod = deploy_to_prod_op(
        release,
        model_name=model_name,
        hydrosphere_address=hydrosphere_address)
    deploy_to_prod.after(test)

In [51]:
compiler.Compiler().compile(pipeline_definition, "pipeline.tar.gz")

In [52]:
%%bash 

tar -xvf pipeline.tar.gz
sed -i '' s/minio-service.kubeflow/minio-service.${NAMESPACE}/g pipeline.yaml
sed -i '' s/pipeline-runner/${NAMESPACE}-pipeline-runner/g pipeline.yaml

x pipeline.yaml


### Test

In [53]:
# start a run
run_name = namesgenerator.get_random_name()
print("Starting a new run with the name {}".format(run_name))
result = client.run_pipeline(
    experiment_id, run_name, "pipeline.yaml",
    {
        "learning-rate": "0.01",
        "batch-size": "256",
        "epochs": "10",
        "hydrosphere-address": hydrosphere_address,
        "acceptable-accuracy": "0.30",
    }
)

Starting a new run with the name serene_perlman
