Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding GPT2 Example #192

Merged
merged 12 commits into from
Sep 9, 2021
269 changes: 144 additions & 125 deletions docs/examples/logging-insights/README.ipynb

Large diffs are not rendered by default.

191 changes: 103 additions & 88 deletions docs/examples/logging-insights/README.md

Large diffs are not rendered by default.

518 changes: 439 additions & 79 deletions docs/examples/mab-thompson-sampling-tempo/README.ipynb

Large diffs are not rendered by default.

234 changes: 228 additions & 6 deletions docs/examples/mab-thompson-sampling-tempo/README.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions docs/examples/multi-model-gpt2-triton-pipeline/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
rclone.conf
832 changes: 832 additions & 0 deletions docs/examples/multi-model-gpt2-triton-pipeline/README.ipynb

Large diffs are not rendered by default.

473 changes: 473 additions & 0 deletions docs/examples/multi-model-gpt2-triton-pipeline/README.md

Large diffs are not rendered by default.

Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Adds namespace to all resources.
namespace: production

resources:
- tempo.yaml


patchesJson6902:
- target:
group: machinelearning.seldon.io
version: v1
kind: SeldonDeployment
name: classifier
path: patch_resources.yaml



Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
- op: add
path: /spec/predictors/0/componentSpecs
value:
- spec:
containers:
- name: classifier
resources:
requests:
cpu: 500m
memory: 500Mi
limits:
cpu: 1
memory: 1Gi

Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: v1
kind: Secret
metadata:
name: minio-secret
type: Opaque
stringData:
RCLONE_CONFIG_S3_TYPE: s3
RCLONE_CONFIG_S3_PROVIDER: minio
RCLONE_CONFIG_S3_ENV_AUTH: "false"
RCLONE_CONFIG_S3_ACCESS_KEY_ID: minioadmin
RCLONE_CONFIG_S3_SECRET_ACCESS_KEY: minioadmin
RCLONE_CONFIG_S3_ENDPOINT: http://minio.minio-system.svc.cluster.local:9000
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: tempo-pipeline
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: tempo-pipeline
rules:
- apiGroups:
- machinelearning.seldon.io
resources:
- seldondeployments/status
verbs:
- get
- apiGroups:
- serving.kubeflow.org
resources:
- inferenceservices/status
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: tempo-pipeline-rolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: tempo-pipeline
subjects:
- kind: ServiceAccount
name: tempo-pipeline
140 changes: 140 additions & 0 deletions docs/examples/multi-model-gpt2-triton-pipeline/k8s/tempo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
apiVersion: machinelearning.seldon.io/v1
kind: SeldonDeployment
metadata:
annotations:
seldon.io/tempo-description: A pipeline to use either an sklearn or xgboost model
for Iris classification
seldon.io/tempo-model: '{"model_details": {"name": "classifier", "local_folder":
"/home/clive/work/mlops/fork-tempo/docs/examples/multi-model/artifacts/classifier",
"uri": "s3://tempo/basic/pipeline", "platform": "tempo", "inputs": {"args":
[{"ty": "numpy.ndarray", "name": "payload"}]}, "outputs": {"args": [{"ty": "numpy.ndarray",
"name": null}, {"ty": "builtins.str", "name": null}]}, "description": "A pipeline
to use either an sklearn or xgboost model for Iris classification"}, "protocol":
"tempo.kfserving.protocol.KFServingV2Protocol", "runtime_options": {"runtime":
"tempo.seldon.SeldonKubernetesRuntime", "state_options": {"state_type": "LOCAL",
"key_prefix": "", "host": "", "port": ""}, "insights_options": {"worker_endpoint":
"", "batch_size": 1, "parallelism": 1, "retries": 3, "window_time": 0, "mode_type":
"NONE", "in_asyncio": false}, "ingress_options": {"ingress": "tempo.ingress.istio.IstioIngress",
"ssl": false, "verify_ssl": true}, "replicas": 1, "minReplicas": null, "maxReplicas":
null, "authSecretName": "minio-secret", "serviceAccountName": null, "add_svc_orchestrator":
false, "namespace": "production"}}'
labels:
seldon.io/tempo: 'true'
name: classifier
namespace: production
spec:
predictors:
- annotations:
seldon.io/no-engine: 'true'
componentSpecs:
- spec:
containers:
- args: []
env:
- name: MLSERVER_HTTP_PORT
value: '9000'
- name: MLSERVER_GRPC_PORT
value: '9500'
- name: MLSERVER_MODEL_IMPLEMENTATION
value: tempo.mlserver.InferenceRuntime
- name: MLSERVER_MODEL_NAME
value: classifier
- name: MLSERVER_MODEL_URI
value: /mnt/models
- name: TEMPO_RUNTIME_OPTIONS
value: '{"runtime": "tempo.seldon.SeldonKubernetesRuntime", "state_options":
{"state_type": "LOCAL", "key_prefix": "", "host": "", "port": ""}, "insights_options":
{"worker_endpoint": "", "batch_size": 1, "parallelism": 1, "retries":
3, "window_time": 0, "mode_type": "NONE", "in_asyncio": true}, "ingress_options":
{"ingress": "tempo.ingress.istio.IstioIngress", "ssl": false, "verify_ssl":
true}, "replicas": 1, "minReplicas": null, "maxReplicas": null, "authSecretName":
"minio-secret", "serviceAccountName": null, "add_svc_orchestrator":
false, "namespace": "production"}'
image: seldonio/mlserver:0.3.2
name: classifier
graph:
envSecretRefName: minio-secret
implementation: TEMPO_SERVER
modelUri: s3://tempo/basic/pipeline
name: classifier
serviceAccountName: tempo-pipeline
type: MODEL
name: default
replicas: 1
protocol: kfserving

---
apiVersion: machinelearning.seldon.io/v1
kind: SeldonDeployment
metadata:
annotations:
seldon.io/tempo-description: An SKLearn Iris classification model
seldon.io/tempo-model: '{"model_details": {"name": "test-iris-sklearn", "local_folder":
"/home/clive/work/mlops/fork-tempo/docs/examples/multi-model/artifacts/sklearn",
"uri": "s3://tempo/basic/sklearn", "platform": "sklearn", "inputs": {"args":
[{"ty": "numpy.ndarray", "name": null}]}, "outputs": {"args": [{"ty": "numpy.ndarray",
"name": null}]}, "description": "An SKLearn Iris classification model"}, "protocol":
"tempo.kfserving.protocol.KFServingV2Protocol", "runtime_options": {"runtime":
"tempo.seldon.SeldonKubernetesRuntime", "state_options": {"state_type": "LOCAL",
"key_prefix": "", "host": "", "port": ""}, "insights_options": {"worker_endpoint":
"", "batch_size": 1, "parallelism": 1, "retries": 3, "window_time": 0, "mode_type":
"NONE", "in_asyncio": false}, "ingress_options": {"ingress": "tempo.ingress.istio.IstioIngress",
"ssl": false, "verify_ssl": true}, "replicas": 1, "minReplicas": null, "maxReplicas":
null, "authSecretName": "minio-secret", "serviceAccountName": null, "add_svc_orchestrator":
false, "namespace": "production"}}'
labels:
seldon.io/tempo: 'true'
name: test-iris-sklearn
namespace: production
spec:
predictors:
- annotations:
seldon.io/no-engine: 'true'
graph:
envSecretRefName: minio-secret
implementation: SKLEARN_SERVER
modelUri: s3://tempo/basic/sklearn
name: test-iris-sklearn
type: MODEL
name: default
replicas: 1
protocol: kfserving

---
apiVersion: machinelearning.seldon.io/v1
kind: SeldonDeployment
metadata:
annotations:
seldon.io/tempo-description: An XGBoost Iris classification model
seldon.io/tempo-model: '{"model_details": {"name": "test-iris-xgboost", "local_folder":
"/home/clive/work/mlops/fork-tempo/docs/examples/multi-model/artifacts/xgboost",
"uri": "s3://tempo/basic/xgboost", "platform": "xgboost", "inputs": {"args":
[{"ty": "numpy.ndarray", "name": null}]}, "outputs": {"args": [{"ty": "numpy.ndarray",
"name": null}]}, "description": "An XGBoost Iris classification model"}, "protocol":
"tempo.kfserving.protocol.KFServingV2Protocol", "runtime_options": {"runtime":
"tempo.seldon.SeldonKubernetesRuntime", "state_options": {"state_type": "LOCAL",
"key_prefix": "", "host": "", "port": ""}, "insights_options": {"worker_endpoint":
"", "batch_size": 1, "parallelism": 1, "retries": 3, "window_time": 0, "mode_type":
"NONE", "in_asyncio": false}, "ingress_options": {"ingress": "tempo.ingress.istio.IstioIngress",
"ssl": false, "verify_ssl": true}, "replicas": 1, "minReplicas": null, "maxReplicas":
null, "authSecretName": "minio-secret", "serviceAccountName": null, "add_svc_orchestrator":
false, "namespace": "production"}}'
labels:
seldon.io/tempo: 'true'
name: test-iris-xgboost
namespace: production
spec:
predictors:
- annotations:
seldon.io/no-engine: 'true'
graph:
envSecretRefName: minio-secret
implementation: XGBOOST_SERVER
modelUri: s3://tempo/basic/xgboost
name: test-iris-xgboost
type: MODEL
name: default
replicas: 1
protocol: kfserving

---
3 changes: 3 additions & 0 deletions docs/examples/outlier/artifacts/outlier/model-settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"parallel_workers": 0
}
16 changes: 8 additions & 8 deletions tempo/kfserving/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ def get_ty(name: str, idx: int, tys: ModelDataArgs) -> Optional[Type]:
ty = tys[name]
if ty is None:
ty = tys[idx]
# if ty is None:
# return np.ndarray
if ty is None:
return np.ndarray
return ty

def to_protocol_response(self, model_details: ModelDetails, *args, **kwargs) -> Dict:
Expand Down Expand Up @@ -132,11 +132,11 @@ def from_protocol_request(self, res: Dict, tys: ModelDataArgs) -> Any:
for idx, input in enumerate(res["inputs"]):
ty = KFServingV2Protocol.get_ty(input["name"], idx, tys)

if ty == np.ndarray:
if input["datatype"] == "BYTES":
inp[input["name"]] = KFServingV2Protocol.convert_from_bytes(input, ty)
elif ty == np.ndarray:
arr = KFServingV2Protocol.create_np_from_v2(input["data"], input["datatype"], input["shape"])
inp[input["name"]] = arr
elif input["datatype"] == "BYTES":
inp[input["name"]] = KFServingV2Protocol.convert_from_bytes(input, ty)
else:
raise ValueError(f"Unknown ty {ty} in conversion")

Expand All @@ -150,11 +150,11 @@ def from_protocol_response(self, res: Dict, tys: ModelDataArgs) -> Any:
for idx, output in enumerate(res["outputs"]):
ty = KFServingV2Protocol.get_ty(output["name"], idx, tys)

if ty == np.ndarray:
if output["datatype"] == "BYTES":
out[output["name"]] = KFServingV2Protocol.convert_from_bytes(output, ty)
elif ty == np.ndarray:
arr = KFServingV2Protocol.create_np_from_v2(output["data"], output["datatype"], output["shape"])
out[output["name"]] = arr
elif output["datatype"] == "BYTES":
out[output["name"]] = KFServingV2Protocol.convert_from_bytes(output, ty)
else:
raise ValueError(f"Unknown ty {ty} in conversion")

Expand Down