# Sample for KFServing SDK 

This is a sample for KFServing SDK. 

The notebook shows how to use KFServing SDK to create, get, rollout_canary, promote and delete InferenceService.

In [3]:
!pip install kfserving==0.3.0.1 --user

Collecting kfserving==0.3.0.1
[?25l  Downloading https://files.pythonhosted.org/packages/8c/61/8d826af3f8554f520eb8f2b4716bab29a3d924098669554672d346e71b9b/kfserving-0.3.0.1-py3-none-any.whl (83kB)
[K     |████████████████████████████████| 92kB 343kB/s eta 0:00:011
[?25hCollecting adal>=1.2.2
[?25l  Downloading https://files.pythonhosted.org/packages/4f/b5/3ea9ae3d1096b9ff31e8f1846c47d49f3129a12464ac0a73b602de458298/adal-1.2.2-py2.py3-none-any.whl (53kB)
[K     |████████████████████████████████| 61kB 495kB/s eta 0:00:011
Collecting table-logger>=0.3.5
  Downloading https://files.pythonhosted.org/packages/0d/80/d4e0e9005caccae0185d4919844792a9227ebf4ed8e431c13625556b8c74/table_logger-0.3.6-py3-none-any.whl
Collecting azure-storage-blob<=2.1.0,>=1.3.0
[?25l  Downloading https://files.pythonhosted.org/packages/3e/84/610f379b46d7d3c2d48eadeed6a12b6d46a43100fea70534f5992d0ac996/azure_storage_blob-2.1.0-py2.py3-none-any.whl (88kB)
[K     |████████████████████████████████| 92kB 138kB/s

In [None]:
# Restart the kernel to pick up pip installed libraries
from IPython.core.display import HTML
HTML("<script>Jupyter.notebook.kernel.restart()</script>")

In [1]:
from kubernetes import client

from kfserving import KFServingClient
from kfserving import constants
from kfserving import utils
from kfserving import V1alpha2EndpointSpec
from kfserving import V1alpha2PredictorSpec
from kfserving import V1alpha2TensorflowSpec
from kfserving import V1alpha2InferenceServiceSpec
from kfserving import V1alpha2InferenceService
from kubernetes.client import V1ResourceRequirements

Define namespace where InferenceService needs to be deployed to. If not specified, below function defines namespace to the current one where SDK is running in the cluster, otherwise it will deploy to default namespace.

In [2]:
namespace = utils.get_default_target_namespace()
print(namespace)

anonymous


## Define InferenceService

Firstly define default endpoint spec, and then define the inferenceservice basic on the endpoint spec.

In [13]:
api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
default_endpoint_spec = V1alpha2EndpointSpec(
                          predictor=V1alpha2PredictorSpec(
                            tensorflow=V1alpha2TensorflowSpec(
                              storage_uri='gs://kfserving-samples/models/tensorflow/flowers',
                              resources=V1ResourceRequirements(
                                  requests={'cpu':'100m','memory':'0.5Gi'},
                                  limits={'cpu':'100m', 'memory':'0.5Gi'}))))
    
isvc = V1alpha2InferenceService(api_version=api_version,
                          kind=constants.KFSERVING_KIND,
                          metadata=client.V1ObjectMeta(
                              name='flower-sample', namespace=namespace),
                          spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

## Create InferenceService

Call KFServingClient to create InferenceService.

In [14]:
KFServing = KFServingClient()
KFServing.create(isvc)

{'apiVersion': 'serving.kubeflow.org/v1alpha2',
 'kind': 'InferenceService',
 'metadata': {'creationTimestamp': '2020-04-03T17:45:46Z',
  'generation': 1,
  'name': 'flower-sample',
  'namespace': 'anonymous',
  'resourceVersion': '20829',
  'selfLink': '/apis/serving.kubeflow.org/v1alpha2/namespaces/anonymous/inferenceservices/flower-sample',
  'uid': 'f29c143b-75d2-11ea-bf9e-0242ac110020'},
 'spec': {'default': {'predictor': {'tensorflow': {'resources': {'limits': {'cpu': '100m',
       'memory': '512Mi'},
      'requests': {'cpu': '100m', 'memory': '512Mi'}},
     'runtimeVersion': '1.14.0',
     'storageUri': 'gs://kfserving-samples/models/tensorflow/flowers'}}}},
 'status': {}}

## Check the InferenceService

In [18]:
!kubectl get inferenceservices -n $namespace

NAME            URL   READY   DEFAULT TRAFFIC   CANARY TRAFFIC   AGE
flower-sample         False                                      13m


In [None]:
KFServing.get('flower-sample', namespace=namespace, watch=True, timeout_seconds=120)

NAME                 READY      DEFAULT_TRAFFIC CANARY_TRAFFIC  URL                                               
flower-sample        False                                                                                        


## Run a prediction

### Get Istio-Ingressgateway Host IP and Node Port

In [None]:
%%bash
export NODE_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath="{.spec.ports[?(@.name=='http2')].nodePort}")
echo $NODE_PORT

export HOST_IP=$(kubectl get po -l istio=ingressgateway -n istio-system -o jsonpath='{.items[0].status.hostIP}')
echo $HOST_IP

### Run sample prediction 

In [None]:
%%bash
MODEL_NAME=flower-sample
INPUT_PATH=@./input.json
INGRESS_GATEWAY=istio-ingressgateway

HOST_IP=$(kubectl get po -l istio=ingressgateway -n istio-system -o jsonpath='{.items[0].status.hostIP}')
NODE_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath="{.spec.ports[?(@.name=='http2')].nodePort}")

SERVICE_HOSTNAME=$(kubectl -n anonymous get inferenceservice ${MODEL_NAME} -o jsonpath='{.status.url}' | cut -d "/" -f 3)

curl -v -H "Host: ${SERVICE_HOSTNAME}"  http://${HOST_IP}:${NODE_PORT}/v1/models/${MODEL_NAME}:predict -d $INPUT_PATH


## Add Canary to InferenceService

Firstly define canary endpoint spec, and then rollout 10% traffic to the canary version, watch the rollout process.

In [22]:
canary_endpoint_spec = V1alpha2EndpointSpec(
                         predictor=V1alpha2PredictorSpec(
                           tensorflow=V1alpha2TensorflowSpec(
                             storage_uri='gs://kfserving-samples/models/tensorflow/flowers-2',
                             resources=V1ResourceRequirements(
                                 requests={'cpu':'100m','memory':'0.5Gi'},
                                 limits={'cpu':'100m', 'memory':'0.5Gi'}))))

KFServing.rollout_canary('flower-sample', canary=canary_endpoint_spec, percent=10,
                         namespace=namespace, watch=True, timeout_seconds=120)

NAME                 READY      DEFAULT_TRAFFIC CANARY_TRAFFIC  URL                                               
flower-sample        False                                                                                        
flower-sample        False                                                                                        
flower-sample        True       90              10              http://flower-sample.anonymous.example.com/v1/m...


In [24]:
!kubectl get inferenceservices -n $namespace

NAME            URL                                                                  READY   DEFAULT TRAFFIC   CANARY TRAFFIC   AGE
flower-sample   http://flower-sample.anonymous.example.com/v1/models/flower-sample   True    90                10               33m


## Rollout more traffic to canary of the InferenceService

Rollout traffice percent to 50% to canary version.

In [None]:
KFServing.rollout_canary('flower-sample', percent=50, namespace=namespace,
                         watch=True, timeout_seconds=120)

## Promote Canary to Default

In [None]:
KFServing.promote('flower-sample', namespace=namespace, watch=True, timeout_seconds=120)

## Delete the InferenceService

In [12]:
KFServing.delete('flower-sample', namespace=namespace)

{'kind': 'Status',
 'apiVersion': 'v1',
 'metadata': {},
 'status': 'Success',
 'details': {'name': 'flower-sample',
  'group': 'serving.kubeflow.org',
  'kind': 'inferenceservices',
  'uid': '5830302c-75d1-11ea-bf9e-0242ac110020'}}