In [1]:
import kfp
import kfp.dsl as dsl 
import kfp.compiler as compiler
import kubernetes.client.models as k8s
import namesgenerator

In [75]:
kubeflow_address = "http://d3c79316.kubeflow.odsc.k8s.hydrosphere.io"
hydrosphere_address = "http://d3c79316.serving.odsc.k8s.hydrosphere.io"

# Download

At this stage we will obtain all training data for our pipeline.

### Stage Image

Create a working file at `01_download/download.py`

Create Dockerfile

In [2]:
%%bash 
cat > 01_download/Dockerfile << EOL
FROM tidylobster/odsc-base:1.0
ADD ./download.py /src/
WORKDIR /src/
ENTRYPOINT [ "python", "download.py" ]
EOL

Build & publish an image

In [3]:
%%bash 
docker build -t tidylobster/mnist-pipeline-download:latest --no-cache 01_download
docker push tidylobster/mnist-pipeline-download:latest

Sending build context to Docker daemon  9.728kB
Step 1/4 : FROM tidylobster/odsc-base:1.0
 ---> a44d37e5b862
Step 2/4 : ADD ./download.py /src/
 ---> f356d8b06daa
Step 3/4 : WORKDIR /src/
 ---> Running in c16a987a259b
Removing intermediate container c16a987a259b
 ---> 88089d296e5d
Step 4/4 : ENTRYPOINT [ "python", "download.py" ]
 ---> Running in 6dd2b48b4cb2
Removing intermediate container 6dd2b48b4cb2
 ---> 8e1c2022dacc
Successfully built 8e1c2022dacc
Successfully tagged tidylobster/mnist-pipeline-download:latest
The push refers to repository [docker.io/tidylobster/mnist-pipeline-download]
917794055c32: Preparing
66a75017de07: Preparing
83c720aa6f39: Preparing
56995c671038: Preparing
eee35c27cf87: Preparing
93ed1238773c: Preparing
eeb5ce6b3db4: Preparing
886601877ba4: Preparing
0fc100fdc7f9: Preparing
68dda0c9a8cd: Preparing
f67191ae09b8: Preparing
b2fd8b4c3da7: Preparing
0de2edf7bff4: Preparing
93ed1238773c: Waiting
eeb5ce6b3db4: Waiting
886601877ba4: Waiting
0fc100fdc7f9: Waiting

### Pipeline

Create Kubernetes PVC resource

In [4]:
storage_pvc = k8s.V1PersistentVolumeClaimVolumeSource(claim_name="storage")
storage_volume = k8s.V1Volume(name="storage", persistent_volume_claim=storage_pvc)
storage_volume_mount = k8s.V1VolumeMount(mount_path="{{workflow.parameters.mount-path}}", name="storage")

Create required environmnet variables 

In [5]:
mount_path_env = k8s.V1EnvVar(name="MOUNT_PATH", value="{{workflow.parameters.mount-path}}")

Define container operation

In [6]:
username = "tidylobster"

In [7]:
def download_op():
    download = dsl.ContainerOp(
        name="download",
        image=f"{username}/mnist-pipeline-download:latest")  # <-- Replace with built docker image
    
    download.add_volume(storage_volume)
    download.add_volume_mount(storage_volume_mount)
    download.add_env_variable(mount_path_env)
    
    return download

In [8]:
@dsl.pipeline(name="mnist", description="MNIST classifier")
def pipeline_definition(
    mount_path="/storage",
):
    download = download_op()

In [9]:
compiler.Compiler().compile(pipeline_definition, "pipeline.tar.gz")

In [17]:
%%bash 

tar -xvf pipeline.tar.gz
sed -i '' s/minio-service.kubeflow/minio-service.${NAMESPACE}/g pipeline.yaml

x pipeline.yaml


### Test

In [18]:
# Create Pipelines client
client = kfp.Client(kubeflow_address)

In [19]:
# Define an experiment name
experiment_name='MNIST Showreal'

In [20]:
# get or create an experiment_id
try:
    experiment_id = client.get_experiment(experiment_name=experiment_name).id
except:
    experiment_id = client.create_experiment(experiment_name).id

In [21]:
# Define a name for the run
run_name = namesgenerator.get_random_name()
run_name

'dreamy_ptolemy'

In [22]:
# make a run
result = client.run_pipeline(experiment_id, run_name, "pipeline.yaml")

# Train

At this stage we will create a model & train it on the downloaded data.

### Stage Image

Create Dockerfile

In [23]:
%%bash 
cat > 02_train/Dockerfile << EOL
FROM tidylobster/odsc-base:1.0
ADD ./train.py /src/
WORKDIR /src/
ENTRYPOINT [ "python", "train.py" ]
EOL

Build and publish image

In [24]:
%%bash 
docker build -t tidylobster/mnist-pipeline-train:latest --no-cache 02_train/ 
docker push tidylobster/mnist-pipeline-train:latest

Sending build context to Docker daemon  54.59MB
Step 1/4 : FROM tidylobster/odsc-base:1.0
 ---> a44d37e5b862
Step 2/4 : ADD ./train.py /src/
 ---> c3d8af72b58b
Step 3/4 : WORKDIR /src/
 ---> Running in 56a3b98c97aa
Removing intermediate container 56a3b98c97aa
 ---> 95cf30a870a7
Step 4/4 : ENTRYPOINT [ "python", "train.py" ]
 ---> Running in ff1b51acfbdc
Removing intermediate container ff1b51acfbdc
 ---> 8e94d53c3631
Successfully built 8e94d53c3631
Successfully tagged tidylobster/mnist-pipeline-train:latest
The push refers to repository [docker.io/tidylobster/mnist-pipeline-train]
3fd977cad826: Preparing
66a75017de07: Preparing
83c720aa6f39: Preparing
56995c671038: Preparing
eee35c27cf87: Preparing
93ed1238773c: Preparing
eeb5ce6b3db4: Preparing
886601877ba4: Preparing
0fc100fdc7f9: Preparing
68dda0c9a8cd: Preparing
f67191ae09b8: Preparing
b2fd8b4c3da7: Preparing
0de2edf7bff4: Preparing
eeb5ce6b3db4: Waiting
f67191ae09b8: Waiting
886601877ba4: Waiting
b2fd8b4c3da7: Waiting
68dda0c9a8c

### Pipeline

Create required environmnet variables

In [25]:
learning_rate_env = k8s.V1EnvVar(name="LEARNING_RATE", value="{{workflow.parameters.learning-rate}}")
epochs_env = k8s.V1EnvVar(name="EPOCHS", value="{{workflow.parameters.epochs}}")
batch_size_env = k8s.V1EnvVar(name="BATCH_SIZE", value="{{workflow.parameters.batch-size}}")

Define container operation

In [26]:
def train_op():
    train = dsl.ContainerOp(
        name="train",
        image="tidylobster/mnist-pipeline-train:latest",        # <-- Replace with correct docker image
        file_outputs={"accuracy": "/accuracy.txt"})

    train.add_volume(storage_volume)
    train.add_volume_mount(storage_volume_mount)
    train.add_env_variable(mount_path_env)
    train.add_env_variable(learning_rate_env)
    train.add_env_variable(epochs_env)
    train.add_env_variable(batch_size_env)
    
    return train

In [27]:
@dsl.pipeline(name="mnist", description="MNIST classifier")
def pipeline_definition(
    mount_path="/storage",
    learning_rate="0.01",
    epochs="10",
    batch_size="256",
):
    
    download = download_op()
    train = train_op()
    
    train.after(download)
    train.set_memory_request('2G')
    train.set_cpu_request('1')

In [28]:
compiler.Compiler().compile(pipeline_definition, "pipeline.tar.gz")

In [29]:
%%bash 

tar -xvf pipeline.tar.gz
sed -i '' s/minio-service.kubeflow/minio-service.${NAMESPACE}/g pipeline.yaml

x pipeline.yaml


### Test

In [30]:
run_name = namesgenerator.get_random_name()
run_name

'naughty_shaw'

In [31]:
# make a run
result = client.run_pipeline(
    experiment_id, run_name, "pipeline.yaml",
    {
        "learning-rate": "0.01",
        "batch-size": "256",
        "epochs": "10"
    }
)

# Upload 

At this stage we will upload the model to Hydrosphere

### Stage Image

Create Dockerfile

In [32]:
%%bash 
cat > 03_upload/Dockerfile << EOL
FROM tidylobster/odsc-base:1.0
ADD ./execute.sh /src/
WORKDIR /src/
ENTRYPOINT [ "bash", "execute.sh" ]
EOL

Build and publish image

In [33]:
%%bash 
docker build -t tidylobster/mnist-pipeline-upload:latest --no-cache 03_upload/ 
docker push tidylobster/mnist-pipeline-upload:latest

Sending build context to Docker daemon  4.608kB
Step 1/4 : FROM tidylobster/odsc-base:1.0
 ---> a44d37e5b862
Step 2/4 : ADD ./execute.sh /src/
 ---> 82b887f31872
Step 3/4 : WORKDIR /src/
 ---> Running in ed41d9c6b3c1
Removing intermediate container ed41d9c6b3c1
 ---> b54df2a34b1e
Step 4/4 : ENTRYPOINT [ "bash", "execute.sh" ]
 ---> Running in b87b766258d3
Removing intermediate container b87b766258d3
 ---> 264eae903a78
Successfully built 264eae903a78
Successfully tagged tidylobster/mnist-pipeline-upload:latest
The push refers to repository [docker.io/tidylobster/mnist-pipeline-upload]
b29001f3bfb3: Preparing
66a75017de07: Preparing
83c720aa6f39: Preparing
56995c671038: Preparing
eee35c27cf87: Preparing
93ed1238773c: Preparing
eeb5ce6b3db4: Preparing
886601877ba4: Preparing
0fc100fdc7f9: Preparing
68dda0c9a8cd: Preparing
f67191ae09b8: Preparing
b2fd8b4c3da7: Preparing
0de2edf7bff4: Preparing
0fc100fdc7f9: Waiting
68dda0c9a8cd: Waiting
f67191ae09b8: Waiting
b2fd8b4c3da7: Waiting
0de2edf

### Pipeline

Create required environmnet variables

In [34]:
model_name_env = k8s.V1EnvVar(name="MODEL_NAME", value="{{workflow.parameters.model-name}}")
hydrosphere_address_env = k8s.V1EnvVar(name="CLUSTER_ADDRESS", value="{{workflow.parameters.hydrosphere-address}}")

Define container operation

In [35]:
def upload_op(train):
    upload = dsl.ContainerOp(
        name="upload",
        image="tidylobster/mnist-pipeline-upload:latest",        # <-- Replace with correct docker image
        file_outputs={"model-version": "/model-version.txt"},
        arguments=[train.outputs["accuracy"]])

    upload.add_volume(storage_volume) 
    upload.add_volume_mount(storage_volume_mount)
    upload.add_env_variable(mount_path_env)
    upload.add_env_variable(model_name_env)
    upload.add_env_variable(hydrosphere_address_env)
    upload.add_env_variable(learning_rate_env)
    upload.add_env_variable(epochs_env)
    upload.add_env_variable(batch_size_env)
    
    return upload

In [36]:
@dsl.pipeline(name="mnist", description="MNIST classifier")
def pipeline_definition(
    mount_path="/storage",
    learning_rate="0.01",
    epochs="10",
    batch_size="256",
    model_name="mnist",
    hydrosphere_address=""
):
    
    download = download_op()
    
    train = train_op()
    train.after(download)
    train.set_memory_request('2G')
    train.set_cpu_request('1')
    
    upload = upload_op(train)
    upload.after(train)

In [37]:
compiler.Compiler().compile(pipeline_definition, "pipeline.tar.gz")

In [38]:
%%bash 

tar -xvf pipeline.tar.gz
sed -i '' s/minio-service.kubeflow/minio-service.${NAMESPACE}/g pipeline.yaml

x pipeline.yaml


### Test

In [39]:
run_name = namesgenerator.get_random_name()
run_name

'keen_borg'

In [40]:
# make a run
result = client.run_pipeline(
    experiment_id, run_name, "pipeline.yaml",
    {
        "learning-rate": "0.01",
        "batch-size": "256",
        "epochs": "10",
        "hydrosphere-address": hydrosphere_address,
    }
)

# Predeploy

At this stage we are pre-deploying the application to run integration tests.

### Stage Image

Create Dockerfile

In [41]:
%%bash 
cat > 04_predeploy/Dockerfile << EOL
FROM tidylobster/odsc-base:1.0
ADD ./execute.sh /src/
WORKDIR /src/
ENTRYPOINT [ "bash", "execute.sh" ]
EOL

Build and publish image

In [42]:
%%bash 
docker build -t tidylobster/mnist-pipeline-predeploy:latest --no-cache 04_predeploy/ 
docker push tidylobster/mnist-pipeline-predeploy:latest

Sending build context to Docker daemon  3.584kB
Step 1/4 : FROM tidylobster/odsc-base:1.0
 ---> a44d37e5b862
Step 2/4 : ADD ./execute.sh /src/
 ---> 89b327d5bf19
Step 3/4 : WORKDIR /src/
 ---> Running in dd732bbc6d5d
Removing intermediate container dd732bbc6d5d
 ---> 926efbe8e331
Step 4/4 : ENTRYPOINT [ "bash", "execute.sh" ]
 ---> Running in 948fb87a76fc
Removing intermediate container 948fb87a76fc
 ---> 4dfbd392404b
Successfully built 4dfbd392404b
Successfully tagged tidylobster/mnist-pipeline-predeploy:latest
The push refers to repository [docker.io/tidylobster/mnist-pipeline-predeploy]
b6eff90bffd4: Preparing
66a75017de07: Preparing
83c720aa6f39: Preparing
56995c671038: Preparing
eee35c27cf87: Preparing
93ed1238773c: Preparing
eeb5ce6b3db4: Preparing
886601877ba4: Preparing
0fc100fdc7f9: Preparing
68dda0c9a8cd: Preparing
f67191ae09b8: Preparing
b2fd8b4c3da7: Preparing
0de2edf7bff4: Preparing
eeb5ce6b3db4: Waiting
886601877ba4: Waiting
93ed1238773c: Waiting
0fc100fdc7f9: Waiting
6

### Pipeline

Create required environmnet variables

In [43]:
application_name_env = k8s.V1EnvVar(name="APPLICATION_NAME", value="{{workflow.parameters.application-name}}")

Define container operation

In [44]:
def predeploy_op(upload):
    predeploy = dsl.ContainerOp(
        name="predeploy",
        image="tidylobster/mnist-pipeline-predeploy:latest",        # <-- Replace with correct docker image
        arguments=[upload.outputs["model-version"]],
        file_outputs={"predeploy-app-name": "/predeploy-app-name.txt"})

    predeploy.add_env_variable(hydrosphere_address_env)
    predeploy.add_env_variable(application_name_env)
    predeploy.add_env_variable(model_name_env)
    
    return predeploy

In [45]:
@dsl.pipeline(name="mnist", description="MNIST classifier")
def pipeline_definition(
    mount_path="/storage",
    learning_rate="0.01",
    epochs="10",
    batch_size="256",
    model_name="mnist",
    hydrosphere_address="",
    application_name="mnist-app"
):
    
    download = download_op()
    
    train = train_op()
    train.after(download)
    train.set_memory_request('2G')
    train.set_cpu_request('1')
    
    upload = upload_op(train)
    upload.after(train)
    
    predeploy = predeploy_op(upload)
    predeploy.after(upload)


In [46]:
compiler.Compiler().compile(pipeline_definition, "pipeline.tar.gz")

In [47]:
%%bash 

tar -xvf pipeline.tar.gz
sed -i '' s/minio-service.kubeflow/minio-service.${NAMESPACE}/g pipeline.yaml

x pipeline.yaml


### Test

In [48]:
run_name = namesgenerator.get_random_name()
run_name

'loving_wright'

In [49]:
# make a run
result = client.run_pipeline(
    experiment_id, run_name, "pipeline.yaml",
    {
        "learning-rate": "0.01",
        "batch-size": "256",
        "epochs": "10",
        "hydrosphere-address": hydrosphere_address,
        "application-name": "mnist-app",
    }
)

# Test

At this stage we are performing integration tests.

### Stage Image

Create Dockerfile

In [50]:
%%bash 
cat > 05_test/Dockerfile << EOL
FROM tidylobster/odsc-base:1.0
ADD ./test.py /src/
WORKDIR /src/
ENTRYPOINT [ "python", "test.py" ]
EOL

Build and publish image

In [51]:
%%bash 
docker build -t tidylobster/mnist-pipeline-test:latest --no-cache 05_test/ 
docker push tidylobster/mnist-pipeline-test:latest

Sending build context to Docker daemon  32.07MB
Step 1/4 : FROM tidylobster/odsc-base:1.0
 ---> a44d37e5b862
Step 2/4 : ADD ./test.py /src/
 ---> 0e2ecfd1de04
Step 3/4 : WORKDIR /src/
 ---> Running in 8924ff3a4217
Removing intermediate container 8924ff3a4217
 ---> 4c748258a8b9
Step 4/4 : ENTRYPOINT [ "python", "test.py" ]
 ---> Running in 7b800c509e65
Removing intermediate container 7b800c509e65
 ---> 38594bc1e067
Successfully built 38594bc1e067
Successfully tagged tidylobster/mnist-pipeline-test:latest
The push refers to repository [docker.io/tidylobster/mnist-pipeline-test]
9599b779b417: Preparing
66a75017de07: Preparing
83c720aa6f39: Preparing
56995c671038: Preparing
eee35c27cf87: Preparing
93ed1238773c: Preparing
eeb5ce6b3db4: Preparing
886601877ba4: Preparing
0fc100fdc7f9: Preparing
68dda0c9a8cd: Preparing
f67191ae09b8: Preparing
b2fd8b4c3da7: Preparing
0de2edf7bff4: Preparing
0fc100fdc7f9: Waiting
68dda0c9a8cd: Waiting
93ed1238773c: Waiting
f67191ae09b8: Waiting
b2fd8b4c3da7: W

### Pipeline

Create required environmnet variables

In [52]:
test_amount_env = k8s.V1EnvVar(name="TEST_AMOUNT", value="{{workflow.parameters.test-amount}}")
requests_delay_env = k8s.V1EnvVar(name="REQUESTS_DELAY", value="{{workflow.parameters.requests-delay}}")
acceptable_accuracy_env = k8s.V1EnvVar(name="ACCEPTABLE_ACCURACY", value="{{workflow.parameters.acceptable-accuracy}}")

Define container operation

In [53]:
def test_op(predeploy):
    test = dsl.ContainerOp(
        name="test",
        image="tidylobster/mnist-pipeline-test:latest",               # <-- Replace with correct docker image
        arguments=[predeploy.outputs["predeploy-app-name"]])

    test.add_volume(storage_volume) 
    test.add_volume_mount(storage_volume_mount)
    test.add_env_variable(mount_path_env)
    test.add_env_variable(hydrosphere_address_env)
    test.add_env_variable(application_name_env)
    test.add_env_variable(test_amount_env)
    test.add_env_variable(acceptable_accuracy_env)
    test.add_env_variable(requests_delay_env)
    
    return test

In [54]:
@dsl.pipeline(name="mnist", description="MNIST classifier")
def pipeline_definition(
    mount_path="/storage",
    learning_rate="0.01",
    epochs="10",
    batch_size="256",
    model_name="mnist",
    hydrosphere_address="",
    application_name="mnist-app",
    acceptable_accuracy="0.90",
    test_amount="100",
    requests_delay="2",
):
    
    download = download_op()
    
    train = train_op()
    train.after(download)
    train.set_memory_request('2G')
    train.set_cpu_request('1')
    
    upload = upload_op(train)
    upload.after(train)
    
    predeploy = predeploy_op(upload)
    predeploy.after(upload)
    
    test = test_op(predeploy)
    test.set_retry(3)
    test.after(predeploy)

In [55]:
compiler.Compiler().compile(pipeline_definition, "pipeline.tar.gz")

In [56]:
%%bash 

tar -xvf pipeline.tar.gz
sed -i '' s/minio-service.kubeflow/minio-service.${NAMESPACE}/g pipeline.yaml

x pipeline.yaml


### Test

In [57]:
run_name = namesgenerator.get_random_name()
run_name

'loving_yalow'

In [58]:
# make a run
result = client.run_pipeline(
    experiment_id, run_name, "pipeline.yaml",
    {
        "learning-rate": "0.01",
        "batch-size": "256",
        "epochs": "10",
        "hydrosphere-address": hydrosphere_address,
        "application-name": "mnist-app",
        "acceptable-accuracy": "0.90",
    }
)

# Remove Predeploy

Once the application has been tested on a real data, we are removing pre-deploy...

### Stage Image

Create Dockerfile

In [59]:
%%bash 
cat > 06_rm-predeploy/Dockerfile << EOL
FROM tidylobster/odsc-base:1.0
ADD ./execute.sh /src/
WORKDIR /src/
ENTRYPOINT [ "bash", "execute.sh" ]
EOL

Build and publish image

In [60]:
%%bash 
docker build -t tidylobster/mnist-pipeline-rm-predeploy:latest --no-cache 06_rm-predeploy/ 
docker push tidylobster/mnist-pipeline-rm-predeploy:latest

Sending build context to Docker daemon  3.072kB
Step 1/4 : FROM tidylobster/odsc-base:1.0
 ---> a44d37e5b862
Step 2/4 : ADD ./execute.sh /src/
 ---> 6691c33950ba
Step 3/4 : WORKDIR /src/
 ---> Running in e35d9c80c9b5
Removing intermediate container e35d9c80c9b5
 ---> 8e2730ea8ca7
Step 4/4 : ENTRYPOINT [ "bash", "execute.sh" ]
 ---> Running in 77446322f768
Removing intermediate container 77446322f768
 ---> 9c5b9cb911ae
Successfully built 9c5b9cb911ae
Successfully tagged tidylobster/mnist-pipeline-rm-predeploy:latest
The push refers to repository [docker.io/tidylobster/mnist-pipeline-rm-predeploy]
48ac0c48bead: Preparing
66a75017de07: Preparing
83c720aa6f39: Preparing
56995c671038: Preparing
eee35c27cf87: Preparing
93ed1238773c: Preparing
eeb5ce6b3db4: Preparing
886601877ba4: Preparing
0fc100fdc7f9: Preparing
68dda0c9a8cd: Preparing
f67191ae09b8: Preparing
b2fd8b4c3da7: Preparing
0de2edf7bff4: Preparing
eeb5ce6b3db4: Waiting
68dda0c9a8cd: Waiting
f67191ae09b8: Waiting
886601877ba4: Wai

### Pipeline

Define container operation

In [61]:
def rm_predeploy_op(predeploy):
    rm_predeploy = dsl.ContainerOp(
        name="remove-predeploy",
        image="tidylobster/mnist-pipeline-rm-predeploy:latest",    # <-- Replace with correct docker image  
        arguments=[predeploy.outputs["predeploy-app-name"]])
    rm_predeploy.add_env_variable(hydrosphere_address_env)
    
    return rm_predeploy

In [62]:
@dsl.pipeline(name="mnist", description="MNIST classifier")
def pipeline_definition(
    mount_path="/storage",
    learning_rate="0.01",
    epochs="10",
    batch_size="256",
    model_name="mnist",
    hydrosphere_address="",
    application_name="mnist-app",
    acceptable_accuracy="0.90",
    test_amount="100",
    requests_delay="2",
):
    
    download = download_op()
    
    train = train_op()
    train.after(download)
    train.set_memory_request('2G')
    train.set_cpu_request('1')
    
    upload = upload_op(train)
    upload.after(train)
    
    predeploy = predeploy_op(upload)
    predeploy.after(upload)
    
    test = test_op(predeploy)
    test.set_retry(3)
    test.after(predeploy)
    
    rm_predeploy = rm_predeploy_op(predeploy)
    rm_predeploy.after(test)

In [63]:
compiler.Compiler().compile(pipeline_definition, "pipeline.tar.gz")

In [64]:
%%bash 

tar -xvf pipeline.tar.gz
sed -i '' s/minio-service.kubeflow/minio-service.${NAMESPACE}/g pipeline.yaml

x pipeline.yaml


### Test

In [65]:
run_name = namesgenerator.get_random_name()
run_name

'lucid_jang'

In [66]:
# make a run
result = client.run_pipeline(
    experiment_id, run_name, "pipeline.yaml",
    {
        "learning-rate": "0.01",
        "batch-size": "256",
        "epochs": "10",
        "hydrosphere-address": hydrosphere_address,
        "application-name": "mnist-app",
        "acceptable-accuracy": "0.90",
    }
)

# Deploy

And finally deploying application to production

### Stage Image

Create Dockerfile

In [67]:
%%bash 
cat > 07_deploy/Dockerfile << EOL
FROM tidylobster/odsc-base:1.0
ADD ./execute.sh /src/
WORKDIR /src/
ENTRYPOINT [ "bash", "execute.sh" ]
EOL

Build and publish image

In [68]:
%%bash 
docker build -t tidylobster/mnist-pipeline-deploy:latest --no-cache 07_deploy/ 
docker push tidylobster/mnist-pipeline-deploy:latest

Sending build context to Docker daemon  3.072kB
Step 1/4 : FROM tidylobster/odsc-base:1.0
 ---> a44d37e5b862
Step 2/4 : ADD ./execute.sh /src/
 ---> a674f52b4b3f
Step 3/4 : WORKDIR /src/
 ---> Running in b7850268ef2d
Removing intermediate container b7850268ef2d
 ---> c1997014ad75
Step 4/4 : ENTRYPOINT [ "bash", "execute.sh" ]
 ---> Running in 5dda16631a65
Removing intermediate container 5dda16631a65
 ---> f6bb31a46098
Successfully built f6bb31a46098
Successfully tagged tidylobster/mnist-pipeline-deploy:latest
The push refers to repository [docker.io/tidylobster/mnist-pipeline-deploy]
94675759f65f: Preparing
66a75017de07: Preparing
83c720aa6f39: Preparing
56995c671038: Preparing
eee35c27cf87: Preparing
93ed1238773c: Preparing
eeb5ce6b3db4: Preparing
886601877ba4: Preparing
0fc100fdc7f9: Preparing
68dda0c9a8cd: Preparing
f67191ae09b8: Preparing
b2fd8b4c3da7: Preparing
0de2edf7bff4: Preparing
93ed1238773c: Waiting
eeb5ce6b3db4: Waiting
886601877ba4: Waiting
0fc100fdc7f9: Waiting
b2fd8b4

### Pipeline

Define container operation

In [76]:
def deploy_op(upload):
    deploy = dsl.ContainerOp(
        name="deploy",
        image="tidylobster/mnist-pipeline-deploy:latest",              # <-- Replace with correct docker image
        arguments=[upload.outputs["model-version"]])

    deploy.add_env_variable(hydrosphere_address_env)
    deploy.add_env_variable(application_name_env)
    deploy.add_env_variable(model_name_env)
    
    return deploy

In [77]:
@dsl.pipeline(name="mnist", description="MNIST classifier")
def pipeline_definition(
    mount_path="/storage",
    learning_rate="0.01",
    epochs="10",
    batch_size="256",
    model_name="mnist",
    hydrosphere_address="",
    application_name="mnist-app",
    acceptable_accuracy="0.90",
    test_amount="100",
    requests_delay="2",
):
    
    download = download_op()
    
    train = train_op()
    train.after(download)
    train.set_memory_request('2G')
    train.set_cpu_request('1')
    
    upload = upload_op(train)
    upload.after(train)
    
    predeploy = predeploy_op(upload)
    predeploy.after(upload)
    
    test = test_op(predeploy)
    test.set_retry(3)
    test.after(predeploy)
    
    rm_predeploy = rm_predeploy_op(predeploy)
    rm_predeploy.after(test)
    
    deploy = deploy_op(upload)
    deploy.after(test)
    

In [78]:
compiler.Compiler().compile(pipeline_definition, "pipeline.tar.gz")

In [79]:
%%bash 

tar -xvf pipeline.tar.gz
sed -i '' s/minio-service.kubeflow/minio-service.${NAMESPACE}/g pipeline.yaml

x pipeline.yaml


### Test

In [73]:
run_name = namesgenerator.get_random_name()
run_name

'festive_stonebraker'

In [74]:
# make a run
result = client.run_pipeline(
    experiment_id, run_name, "pipeline.yaml",
    {
        "learning-rate": "0.01",
        "batch-size": "256",
        "epochs": "10",
        "hydrosphere-address": hydrosphere_address,
        "application-name": "mnist-app",
        "acceptable-accuracy": "0.90",
    }
)

# Sample

Once we have a fully functional pipeline, we would like to automate running this pipeline. But we don't 

### Stage Image

Create Dockerfile

In [80]:
%%bash 
cat > 01_sampling/Dockerfile << EOL
FROM tidylobster/odsc-base:1.0
ADD ./sample.py /src/
WORKDIR /src/
ENTRYPOINT [ "python", "sample.py" ]
EOL

Build and publish image

In [82]:
%%bash 
docker build -t tidylobster/mnist-pipeline-sample:latest --no-cache 01_sample/ 
docker push tidylobster/mnist-pipeline-sample:latest

Sending build context to Docker daemon  144.9kB
Step 1/4 : FROM tidylobster/odsc-base:1.0
 ---> a44d37e5b862
Step 2/4 : ADD ./sample.py /src/
 ---> c9807cee3f81
Step 3/4 : WORKDIR /src/
 ---> Running in c260f969cb23
Removing intermediate container c260f969cb23
 ---> 2d1d9aa35c0c
Step 4/4 : ENTRYPOINT [ "python", "sample.py" ]
 ---> Running in e407c23c9fe8
Removing intermediate container e407c23c9fe8
 ---> 990363323ae0
Successfully built 990363323ae0
Successfully tagged tidylobster/mnist-pipeline-sample:latest
The push refers to repository [docker.io/tidylobster/mnist-pipeline-sample]
06ec69383254: Preparing
66a75017de07: Preparing
83c720aa6f39: Preparing
56995c671038: Preparing
eee35c27cf87: Preparing
93ed1238773c: Preparing
eeb5ce6b3db4: Preparing
886601877ba4: Preparing
0fc100fdc7f9: Preparing
68dda0c9a8cd: Preparing
f67191ae09b8: Preparing
b2fd8b4c3da7: Preparing
0de2edf7bff4: Preparing
886601877ba4: Waiting
68dda0c9a8cd: Waiting
f67191ae09b8: Waiting
b2fd8b4c3da7: Waiting
0de2edf

### Pipeline

Define container operation

In [83]:
def sample_op():
    sample = dsl.ContainerOp(
        name="sample",
        image="tidylobster/mnist-pipeline-sample:latest")     # <-- Replace with correct docker image
    
    sample.add_volume(storage_volume)
    sample.add_volume_mount(storage_volume_mount)
    sample.add_env_variable(mount_path_env)
    sample.add_env_variable(hydrosphere_address_env)
    sample.add_env_variable(application_name_env)
    
    return sample

In [84]:
@dsl.pipeline(name="mnist", description="MNIST classifier")
def pipeline_definition(
    mount_path="/storage",
    learning_rate="0.01",
    epochs="10",
    batch_size="256",
    model_name="mnist",
    hydrosphere_address="",
    application_name="mnist-app",
    acceptable_accuracy="0.90",
    test_amount="100",
    requests_delay="2",
):
    
    sample = sample_op()
    
    train = train_op()
    train.after(sample)
    train.set_memory_request('2G')
    train.set_cpu_request('1')
    
    upload = upload_op(train)
    upload.after(train)
    
    predeploy = predeploy_op(upload)
    predeploy.after(upload)
    
    test = test_op(predeploy)
    test.set_retry(3)
    test.after(predeploy)
    
    rm_predeploy = rm_predeploy_op(predeploy)
    rm_predeploy.after(test)
    
    deploy = deploy_op(upload)
    deploy.after(test)
    

In [85]:
compiler.Compiler().compile(pipeline_definition, "pipeline.tar.gz")

In [86]:
%%bash 

tar -xvf pipeline.tar.gz
sed -i '' s/minio-service.kubeflow/minio-service.${NAMESPACE}/g pipeline.yaml

x pipeline.yaml


### Test

In [87]:
run_name = namesgenerator.get_random_name()
run_name

'focused_jones'

In [88]:
# make a run
result = client.run_pipeline(
    experiment_id, run_name, "pipeline.yaml",
    {
        "learning-rate": "0.01",
        "batch-size": "256",
        "epochs": "10",
        "hydrosphere-address": "http://d3c79316.serving.odsc.k8s.hydrosphere.io",
        "application-name": "mnist-app",
        "acceptable-accuracy": "0.90",
    }
)

ApiException: (404)
Reason: Not Found
HTTP response headers: HTTPHeaderDict({'Server': 'nginx/1.15.3', 'Date': 'Tue, 23 Apr 2019 10:41:19 GMT', 'Content-Type': 'text/plain; charset=utf-8', 'Content-Length': '21', 'Connection': 'keep-alive'})
HTTP response body: default backend - 404
