In [39]:
with open("requirements.txt", "w") as f:
    f.write("kubernetes>=12.0.0\n")
    f.write("kfp==1.8.9\n")
    f.write("opencv-python-headless==4.5.3.56\n")
    f.write("requests\n")
    
!pip install -r requirements.txt  --upgrade --user

Collecting kubernetes>=12.0.0
  Using cached kubernetes-21.7.0-py2.py3-none-any.whl (1.8 MB)


In [35]:
import kfp.dsl as dsl
import kfp
from kfp import components

kfserving_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/master/components/kubeflow/kfserving/component.yaml')

@dsl.pipeline(
  name='KFServing pipeline',
  description='A pipeline for KFServing with PVC.'
)
def kfservingPipeline(
    action='apply',
    namespace='kubeflow-user-example-com',
    pvc_name='mnist-pipeline-l892p-newpvc',
    model_name='model21'):
    # mnist model was trained with 10 category, each category represents one single digit

    # specify the model dir located on pvc
    model_pvc_uri = 'pvc://{}/{}/'.format(pvc_name, model_name)
    
    # create inference service resource named by model_name
    isvc_yaml = '''
apiVersion: "serving.kubeflow.org/v1beta1"
kind: "InferenceService"
metadata:
  name: {}
  namespace: {}
spec:
  predictor:
    tensorflow:
      storageUri: {}
      resources:
        limits:
          cpu: "100m"
        requests:
          cpu: "100m"
'''.format(model_name, namespace, model_pvc_uri)
    
    
### canary rollout###
### 10% traffic go to model2 while 90 traffic go to model1 (original)
### set traffic to 100 to use model2 as default model (aka rollout)
### set traffic to 0 to use model1 as default model (aks rollback)

# traffic = 10
#     isvc_canary_yaml = '''
# apiVersion: "serving.kubeflow.org/v1beta1"
# kind: "InferenceService"
# metadata:
#   name: {}
#   namespace: {}
# spec:
#   predictor:
#     canaryTrafficPercent: {}
#     tensorflow:
#       storageUri: {}
# '''.format(model_name, namespace, traffic, model_pvc_uri)
    
    # created kserve resource
    kfserving = kfserving_op(
        action=action,
        inferenceservice_yaml=isvc_yaml
    )

In [36]:
# Compile pipeline
kfp.compiler.Compiler().compile(kfservingPipeline, 'mnist-kserve.zip')

##### you have to REPLACE authervice_session token into yours. Go to Browser -> Developer Console -> Application -> Cookies
![title](../../pipelines/img/cookies.png)

In [38]:
## the following example use python's request to send restapi requests
import json
import requests
import cv2
import numpy as np

# make http request against the model you deployed previously 
model_name = 'tensorflow-example'
auth = 'authservice_session={}'.format('MTY0MTQ0MTY5MnxOd3dBTkZvM1dUWkhXRUZDTmtWSVJGUkVVVlpTVmxaVFZrVklNazVTVFRVMlVWVkRUVkl5VTA0MFdsQlVRa2t5U0ZSSU0wMU9OMUU9fI8LEqSb73HcgHAoDNxqlAp4UScJNlj6FRcbRcHmQULe')
host = '{}.kubeflow-user-example-com.example.com'.format(model_name)
predict_url = 'http://istio-ingressgateway.istio-system/v1/models/{}:predict'.format(model_name)
classnames = ['0','1','2','3','4','5','6','7','8','9']

orig = cv2.imread('5.png')
resized = cv2.resize(orig, (28,28), interpolation = cv2.INTER_AREA)
resized_arr = np.asarray(resized)/255.0
headers = {'Cookie': auth, 'Host': host}
payload={"signature_name": "serving_default", "instances": [resized_arr.tolist()]}
resp = requests.post(predict_url, headers=headers, data=json.dumps(payload))
resp_json = json.loads(resp.content)
for p in resp_json['predictions']:
    print('prediction:', classnames[np.argmax(p)])

prediction: 5
prediction: 2
prediction: 2
