Update readiness and liveness

Signed-off-by: Francesco Murdaca <fmurdaca@redhat.com>
AICoE · Sep 29, 2021 · 42c8499 · 42c8499
1 parent 223333e
commit 42c8499
Show file tree

Hide file tree

Showing 7 changed files with 89 additions and 46 deletions.
diff --git a/.aicoe-ci.yaml b/.aicoe-ci.yaml
@@ -68,7 +68,7 @@ overlays:
 
   - name: nm-convert-onnx
     build:
-      base-image: "quay.io/thoth-station/s2i-elyra-custom-notebook:v0.3.3"
+      base-image: "quay.io/thoth-station/s2i-thoth-ubi8-py38:v0.28.0"
       build-source-script: "image:///opt/app-root/builder"
       custom-tag: latest
       build-stratergy: Source
@@ -79,7 +79,7 @@ overlays:
 
   - name: nm-sparseml
     build:
-      base-image: "quay.io/thoth-station/s2i-elyra-custom-notebook:v0.3.3"
+      base-image: "quay.io/thoth-station/s2i-thoth-ubi8-py38:v0.28.0"
       build-source-script: "image:///opt/app-root/builder"
       custom-tag: latest
       build-stratergy: Source

diff --git a/elyra-aidevsecops-tutorial-neural-magic.pipeline b/elyra-aidevsecops-tutorial-neural-magic.pipeline
@@ -139,4 +139,4 @@
     }
   ],
   "schemas": []
-}
+}
diff --git a/manifests/base/deploymentconfig.yaml b/manifests/base/deploymentconfig.yaml
@@ -8,19 +8,23 @@ spec:
   selector:
     service: elyra-aidevsecops-tutorial
     app.kubernetes.io/name: thoth
-    app.kubernetes.io/component: elyra-aidevsecops-tutorial
+    app.kubernetes.io/component: aicoe-gather-metrics-pipeline
     app.kubernetes.io/managed-by: aicoe-thoth-devops
   template:
     metadata:
       labels:
         service: elyra-aidevsecops-tutorial
         app.kubernetes.io/name: thoth
-        app.kubernetes.io/component: elyra-aidevsecops-tutorial
+        app.kubernetes.io/component: aicoe-gather-metrics-pipeline
         app.kubernetes.io/managed-by: aicoe-thoth-devops
     spec:
       containers:
         - name: elyra-aidevsecops-tutorial
           image: quay.io/thoth-station/elyra-aidevsecops-tutorial
+          env:
+            - name: WEB_CONCURRENCY
+              value: "1"
+          imagePullPolicy: Always
           ports:
             - containerPort: 8080
               protocol: TCP
@@ -31,20 +35,20 @@ spec:
             limits:
               memory: "768Mi"
               cpu: "100m"
-          readinessProbe:
+          livenessProbe:
             httpGet:
-              path: "/metrics"
+              path: /liveness
               port: 8080
               scheme: HTTP
-            initialDelaySeconds: 30
-            periodSeconds: 10
+            initialDelaySeconds: 5
+            periodSeconds: 60
             timeoutSeconds: 10
-          livenessProbe:
+          readinessProbe:
             httpGet:
-              path: "/metrics"
+              path: /readiness
               port: 8080
               scheme: HTTP
-            initialDelaySeconds: 45
+            initialDelaySeconds: 5
             periodSeconds: 10
             timeoutSeconds: 10
   test: false
diff --git a/manifests/overlays/inference/deploymentconfig.yaml b/manifests/overlays/inference/deploymentconfig.yaml
@@ -8,19 +8,23 @@ spec:
   selector:
     service: elyra-aidevsecops-tutorial
     app.kubernetes.io/name: thoth
-    app.kubernetes.io/component: elyra-aidevsecops-tutorial
+    app.kubernetes.io/component: aicoe-gather-metrics-pipeline
     app.kubernetes.io/managed-by: aicoe-thoth-devops
   template:
     metadata:
       labels:
         service: elyra-aidevsecops-tutorial
         app.kubernetes.io/name: thoth
-        app.kubernetes.io/component: elyra-aidevsecops-tutorial
+        app.kubernetes.io/component: aicoe-gather-metrics-pipeline
         app.kubernetes.io/managed-by: aicoe-thoth-devops
     spec:
       containers:
         - name: elyra-aidevsecops-tutorial
           image: quay.io/thoth-station/elyra-aidevsecops-tutorial
+          env:
+            - name: WEB_CONCURRENCY
+              value: "1"
+          imagePullPolicy: Always
           ports:
             - containerPort: 8080
               protocol: TCP
@@ -31,20 +35,20 @@ spec:
             limits:
               memory: "768Mi"
               cpu: "100m"
-          readinessProbe:
+          livenessProbe:
             httpGet:
-              path: "/metrics"
+              path: /liveness
               port: 8080
               scheme: HTTP
-            initialDelaySeconds: 30
-            periodSeconds: 10
+            initialDelaySeconds: 5
+            periodSeconds: 60
             timeoutSeconds: 10
-          livenessProbe:
+          readinessProbe:
             httpGet:
-              path: "/metrics"
+              path: /readiness
               port: 8080
               scheme: HTTP
-            initialDelaySeconds: 45
+            initialDelaySeconds: 5
             periodSeconds: 10
             timeoutSeconds: 10
   test: false
diff --git a/manifests/overlays/nm-inference/deploymentconfig.yaml b/manifests/overlays/nm-inference/deploymentconfig.yaml
@@ -8,24 +8,27 @@ spec:
   selector:
     service: neural-magic-deepsparse
     app.kubernetes.io/name: thoth
-    app.kubernetes.io/component: neural-magic-deepsparse
+    app.kubernetes.io/component: aicoe-gather-metrics-pipeline
     app.kubernetes.io/managed-by: aicoe-thoth-devops
   template:
     metadata:
       labels:
         service: neural-magic-deepsparse
         app.kubernetes.io/name: thoth
-        app.kubernetes.io/component: neural-magic-deepsparse
+        app.kubernetes.io/component: aicoe-gather-metrics-pipeline
         app.kubernetes.io/managed-by: aicoe-thoth-devops
     spec:
       containers:
         - name: neural-magic-deepsparse
           image: quay.io/thoth-station/neural-magic-deepsparse
           env:
+            - name: WEB_CONCURRENCY
+              value: "1"
             - name: TUTORIAL_USE_NEURAL_MAGIC
               value: "1"
             - name: THOTH_AIDEVSECOPS_MODEL_VERSION
               value: "torch-210921164335-c352fe9b17e2f837_mnist_classification_pruned"
+          imagePullPolicy: Always
           ports:
             - containerPort: 8080
               protocol: TCP
@@ -36,20 +39,20 @@ spec:
             limits:
               memory: "768Mi"
               cpu: "100m"
-          readinessProbe:
+          livenessProbe:
             httpGet:
-              path: "/metrics"
+              path: /liveness
               port: 8080
               scheme: HTTP
-            initialDelaySeconds: 30
-            periodSeconds: 10
+            initialDelaySeconds: 5
+            periodSeconds: 60
             timeoutSeconds: 10
-          livenessProbe:
+          readinessProbe:
             httpGet:
-              path: "/metrics"
+              path: /readiness
               port: 8080
               scheme: HTTP
-            initialDelaySeconds: 45
+            initialDelaySeconds: 5
             periodSeconds: 10
             timeoutSeconds: 10
   test: false
diff --git a/manifests/overlays/pytorch-inference/deploymentconfig.yaml b/manifests/overlays/pytorch-inference/deploymentconfig.yaml
@@ -8,24 +8,27 @@ spec:
   selector:
     service: pytorch-inference
     app.kubernetes.io/name: thoth
-    app.kubernetes.io/component: pytorch-inference
+    app.kubernetes.io/component: aicoe-gather-metrics-pipeline
     app.kubernetes.io/managed-by: aicoe-thoth-devops
   template:
     metadata:
       labels:
         service: pytorch-inference
         app.kubernetes.io/name: thoth
-        app.kubernetes.io/component: pytorch-inference
+        app.kubernetes.io/component: aicoe-gather-metrics-pipeline
         app.kubernetes.io/managed-by: aicoe-thoth-devops
     spec:
       containers:
         - name: pytorch-inference
           image: quay.io/thoth-station/elyra-aidevsecops-pytorch-inference
           env:
+            - name: WEB_CONCURRENCY
+              value: "1"
             - name: USE_PYTORCH
               value: "1"
             - name: THOTH_AIDEVSECOPS_MODEL_VERSION
               value: "torch-210921163030-5341ad0f6f389a55"
+          imagePullPolicy: Always
           ports:
             - containerPort: 8080
               protocol: TCP
@@ -36,20 +39,20 @@ spec:
             limits:
               memory: "768Mi"
               cpu: "100m"
-          readinessProbe:
+          livenessProbe:
             httpGet:
-              path: "/metrics"
+              path: /liveness
               port: 8080
               scheme: HTTP
-            initialDelaySeconds: 30
-            periodSeconds: 10
+            initialDelaySeconds: 5
+            periodSeconds: 60
             timeoutSeconds: 10
-          livenessProbe:
+          readinessProbe:
             httpGet:
-              path: "/metrics"
+              path: /readiness
               port: 8080
               scheme: HTTP
-            initialDelaySeconds: 45
+            initialDelaySeconds: 5
             periodSeconds: 10
             timeoutSeconds: 10
   test: false
diff --git a/wsgi.py b/wsgi.py
@@ -29,14 +29,11 @@
 from flask import Flask
 from flask import request
 from flask import redirect
+from flask import jsonify
 
 from prometheus_flask_exporter import PrometheusMetrics
 from prometheus_client import generate_latest
 
-from src.model import Model as TensorflowModel
-from src.pytorch_model import Model as PytorchModel
-from src.neuralmagic import Model as NeuralMagicModel
-
 _LOGGER = logging.getLogger("aidevsecops-tutorial")
 _LOGGER.info("Thoth AIDevSecOps Tutorial v%s", __version__)
 
@@ -61,10 +58,15 @@
 )
 
 if USE_NEURAL_MAGIC:
-    model = NeuralMagicModel()
+    from src.neural_magic_model import Model as NeuralMagicModel
+
+    nm_model = NeuralMagicModel()
 elif USE_PYTORCH:
-    model = PytorchModel()
+    from src.pytorch_model import Model as PytorchModel
+
+    pytorch_model = PytorchModel()
 else:
+    from src.model import Model as TensorflowModel
     model = TensorflowModel()
 
 # custom metric to expose model version
@@ -95,13 +97,40 @@ def main():
     return redirect(_REDIRECT_URL, code=308)
 
 
+def _healthiness():
+    return (
+        jsonify({"status": "ready", "version": __version__}),
+        200,
+        {"ContentType": "application/json"},
+    )
+
+
+@application.route("/readiness")
+def api_readiness():
+    """Report readiness for OpenShift readiness probe."""
+    return _healthiness()
+
+
+@application.route("/liveness")
+def api_liveness():
+    """Report liveness for OpenShift readiness probe."""
+    return _healthiness()
+
+
 @application.route("/predict", methods=["POST"])
 def predict():
     """Evaluate prediction."""
     image = request.get_json()["inputs"]
 
     start = time.monotonic()
-    prediction, probability = model.predict(image=image)
+
+    if USE_NEURAL_MAGIC:
+        prediction, probability = nm_model.predict(image=image)
+    elif USE_PYTORCH:
+        prediction, probability = pytorch_model.predict(image=image)
+    else:
+        prediction, probability = model.predict(image=image)
+
     latency = time.monotonic() - start
 
     return json.dumps(