Skip to content

Commit

Permalink
Merge pull request #2842 from RafalSkolasinski/rclone-powered-batch
Browse files Browse the repository at this point in the history
Rclone powered batch
  • Loading branch information
axsaucedo authored Jan 15, 2021
2 parents f7e4e19 + 294df22 commit 2e5f30d
Show file tree
Hide file tree
Showing 10 changed files with 576 additions and 285 deletions.
3 changes: 3 additions & 0 deletions doc/source/examples/argo_workflows_hdfs_batch.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"path": "../../../examples/batch/hdfs-argo-workflows/hdfs-batch.ipynb"
}
3 changes: 2 additions & 1 deletion doc/source/examples/notebooks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ Batch Processing with Seldon Core
.. toctree::
:titlesonly:

Batch Processing with Argo Workflows <argo_workflows_batch>
Batch Processing with Argo Workflows and S3 / Minio <argo_workflows_batch>
Batch Processing with Argo Workflows and HDFS <argo_workflows_hdfs_batch>
Batch Processing with Kubeflow Pipelines <kubeflow_pipelines_batch>


Expand Down
3 changes: 2 additions & 1 deletion doc/source/servers/batch.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ This allows you to leverage Seldon on a large number of batch applications, incl

We have provided a set of examples that show you how you can use the Seldon batch processing component:

* [Batch Processing with Argo Workflows](../examples/argo_workflows_batch.html)
* [Batch Processing with Argo Workflows and S3 / Minio](../examples/argo_workflows_batch.html)
* [Batch processing with Argo Workflows and HDFS](../examples/argo_workflows_hdfs_batch.html)
* [Batch Processing with Kubeflow Pipelines Example](../examples/kubeflow_pipelines_batch.html)

## High Level Implementation Details
Expand Down
379 changes: 252 additions & 127 deletions examples/batch/argo-workflows-batch/README.ipynb

Large diffs are not rendered by default.

334 changes: 240 additions & 94 deletions examples/batch/argo-workflows-batch/README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ keywords:
name: seldon-batch-workflow
sources:
- https://github.com/SeldonIO/seldon-core
version: 0.1
version: 0.2
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,17 @@ spec:
resources:
requests:
storage: "{{ .Values.pvc.storage }}"
volumes:
- name: rclone-config
secret:
secretName: "{{ .Values.rclone.configSecretName }}"
templates:
- name: seldon-batch-process
steps:
- - name: create-seldon-resource
- - name: create-seldon-resource
template: create-seldon-resource-template
- - name: wait-seldon-resource
template: wait-seldon-resource-template
template: wait-seldon-resource-template
- - name: download-object-store
template: download-object-store-template
- - name: process-batch-inputs
Expand All @@ -39,7 +43,7 @@ spec:
template: upload-object-store-template
- - name: delete-seldon-resource
template: delete-seldon-resource-template

- name: create-seldon-resource-template
metadata:
annotations:
Expand Down Expand Up @@ -84,7 +88,7 @@ spec:
name: classifier
name: default
replicas: {{ .Values.seldonDeployment.replicas }}
- name: wait-seldon-resource-template
metadata:
annotations:
Expand All @@ -96,34 +100,23 @@ spec:
sleep {{ .Values.seldonDeployment.waitTime }}
kubectl rollout status \
deploy/$(kubectl get deploy -l seldon-deployment-id="{{ .Values.seldonDeployment.name }}" -o jsonpath='{.items[0].metadata.name}')
- name: download-object-store-template
metadata:
annotations:
sidecar.istio.io/inject: "false"
script:
image: minio/{{ .Values.minio.minioclient.image }}
env:
- name: MINIO_SERVER_ACCESS_KEY
valueFrom:
secretKeyRef:
name: {{ .Values.minio.secret.name }}
key: {{ .Values.minio.secret.keyName.accesskey }}
- name: MINIO_SERVER_ACCESS_SECRET
valueFrom:
secretKeyRef:
name: {{ .Values.minio.secret.name }}
key: {{ .Values.minio.secret.keyName.secretkey }}
- name: MINIO_SERVER_HOST
value: {{ .Values.minio.endpoint }}
image: {{ .Values.rclone.image }}
volumeMounts:
- name: "{{ .Values.pvc.name }}"
mountPath: /assets
- name: rclone-config
mountPath: /config/rclone
readOnly: true
command: [sh]
source: |
mc --insecure config host add minio-local $MINIO_SERVER_HOST $MINIO_SERVER_ACCESS_KEY $MINIO_SERVER_ACCESS_SECRET --api S3v4
mc --insecure cp minio-local/{{ .Values.minio.inputDataPath }} /assets/input-data.txt
rclone copyto {{ .Values.rclone.inputDataPath }} /assets/input-data.txt
- name: process-batch-inputs-template
metadata:
annotations:
Expand All @@ -148,34 +141,24 @@ spec:
--retries "{{ .Values.batchWorker.retries }}" \
--input-data-path "/assets/input-data.txt" \
--output-data-path "/assets/output-data.txt"
- name: upload-object-store-template
metadata:
annotations:
sidecar.istio.io/inject: "false"
script:
image: minio/{{ .Values.minio.minioclient.image }}
image: {{ .Values.rclone.image }}
volumeMounts:
- name: "{{ .Values.pvc.name }}"
mountPath: /assets
- name: rclone-config
mountPath: /config/rclone
readOnly: true
command: [sh]
env:
- name: MINIO_SERVER_ACCESS_KEY
valueFrom:
secretKeyRef:
name: {{ .Values.minio.secret.name }}
key: {{ .Values.minio.secret.keyName.accesskey }}
- name: MINIO_SERVER_ACCESS_SECRET
valueFrom:
secretKeyRef:
name: {{ .Values.minio.secret.name }}
key: {{ .Values.minio.secret.keyName.secretkey }}
- name: MINIO_SERVER_HOST
value: {{ .Values.minio.endpoint }}
source: |
mc --insecure config host add minio-local $MINIO_SERVER_HOST $MINIO_SERVER_ACCESS_KEY $MINIO_SERVER_ACCESS_SECRET --api S3v4
mc --insecure cp /assets/output-data.txt minio-local/{{ .Values.minio.outputDataPath }}
rclone copyto /assets/output-data.txt {{ .Values.rclone.outputDataPath }}
- name: delete-seldon-resource-template
metadata:
annotations:
Expand All @@ -186,4 +169,3 @@ spec:
source: |
sleep {{ .Values.seldonDeployment.waitTime }}
kubectl delete seldondeployments -n {{ .Values.workflow.namespace }} {{ .Values.seldonDeployment.name }}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ workflow:
pvc:
# Name of the persistent volume claim to be created
name: seldon-pvc
# Size of the storage volume to be created for the batch job
# Size of the storage volume to be created for the batch job
storage: 2Mi
# Seldon deployment to be created for batch processing
seldonDeployment:
Expand Down Expand Up @@ -54,22 +54,12 @@ batchWorker:
dataType: "data"
# Whether to enable benchmarking on the batch processor worker
enableBenchmark: true
minio:
# Minio Client (mc) Image name
minioclient:
image: mc:RELEASE.2020-10-03T02-54-56Z
# The location of the minio endpoint
endpoint: http://minio.minio-system.svc.cluster.local:9000
# This is the secret that should contain the values to access minio
secret:
# The name of the secret which by default is "minio" but you can create a different one
name: minio
keyName:
# The key name inside that secret to find the access key to authenticate minio
accesskey: accesskey
# The key name inside that secret to find the secret key to authenticate minio
secretkey: secretkey
# The name of the file inside of minio that will contain the batch data to process
inputDataPath: data/input-data.txt
# The name of the file inside of minio that will contain the batch data to process
outputDataPath: data/output-data-{{workflow.uid}}.txt
rclone:
# Rclone Image Name
image: rclone/rclone:1.53
# Name of secret containing rclone.conf file
configSecretName: rclone-config-secret
# The source:sourcepath of file for rclone to pull that will contain the batch data to process
inputDataPath: cluster-minio:data/input-data.txt
# The dest:destpath for rclone to know where to push results of batch processing
outputDataPath: cluster-minio:data/output-data-{{workflow.uid}}.txt
14 changes: 14 additions & 0 deletions examples/batch/argo-workflows-batch/rclone-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: v1
kind: Secret
metadata:
name: rclone-config-secret
type: Opaque
stringData:
rclone.conf: |
[cluster-minio]
type = s3
provider = minio
env_auth = false
access_key_id = minioadmin
secret_access_key = minioadmin
endpoint = http://minio.minio-system.svc.cluster.local:9000
29 changes: 29 additions & 0 deletions examples/batch/argo-workflows-batch/role.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: workflow
rules:
- apiGroups:
- ""
resources:
- pods
verbs:
- "*"
- apiGroups:
- "apps"
resources:
- deployments
verbs:
- "*"
- apiGroups:
- ""
resources:
- pods/log
verbs:
- "*"
- apiGroups:
- machinelearning.seldon.io
resources:
- "*"
verbs:
- "*"

0 comments on commit 2e5f30d

Please sign in to comment.