# Model serving using Kserve

In the previous labs you learned how train and export a model to storage, now we will use this model for simple online predictions

In [1]:
## import depandancies
from kubernetes import client 
from kserve import KServeClient
from kserve import constants
from kserve import utils
from kserve import V1beta1InferenceService
from kserve import V1beta1InferenceServiceSpec
from kserve import V1beta1PredictorSpec
from kserve import V1beta1SKLearnSpec
from kserve import V1beta1XGBoostSpec


## 4.1 Deploy the model

### 4.1.1 Define model serving Metadata

In [2]:
# get kserve namespace
namespace = utils.get_default_target_namespace()
### Define the model name, due to technical limitations on service naming size, consider using just a trigram.
name = ...
### the path to model used to launch the service
bucket=...
storage_uri=f"s3://{bucket}/models/frompipeline/xgboost/chicago"

### 4.1.2 Create a secret and a service account so Kserve can use model in MinIO

Check the `./resources/s3_secret.yaml` file that contains 2 resources definitions, and a link to MinIO

In [None]:

!kubectl apply -f ./resources/s3_secret.yaml

### 4.1.3 Create the inference service specification using

- The model name and namespace
- The storage URI
- The Protocol version
- The service account created in ./resources/s3_secret.yaml
- An "image pull secret" reference so kserve can pull images from container registry (predictor and others...)


In [4]:
chicago_isvc = V1beta1InferenceService(
    api_version="serving.kserve.io/v1beta1",
    kind=constants.KSERVE_KIND,
    metadata=client.V1ObjectMeta(
        name=name,
        namespace=namespace
    ),
    spec=V1beta1InferenceServiceSpec(
        predictor=V1beta1PredictorSpec(
            xgboost=(
                V1beta1XGBoostSpec(
                    storage_uri=storage_uri,
                    protocol_version="v2"
                )
            ),
            service_account_name='kserve-minio-sa',
            image_pull_secrets=[{'name':'registry-secret'}]
        )
    )

)

### 4.1.4 launch this service

In [5]:
KServe = KServeClient()
KServe.create(chicago_isvc)

{'apiVersion': 'serving.kserve.io/v1beta1',
 'kind': 'InferenceService',
 'metadata': {'creationTimestamp': '2022-12-16T15:52:07Z',
  'generation': 1,
  'managedFields': [{'apiVersion': 'serving.kserve.io/v1beta1',
    'fieldsType': 'FieldsV1',
    'fieldsV1': {'f:spec': {'.': {},
      'f:predictor': {'.': {},
       'f:imagePullSecrets': {},
       'f:serviceAccountName': {},
       'f:xgboost': {'.': {},
        'f:name': {},
        'f:protocolVersion': {},
        'f:storageUri': {}}}}},
    'manager': 'OpenAPI-Generator',
    'operation': 'Update',
    'time': '2022-12-16T15:52:04Z'}],
  'name': 'xgb',
  'namespace': 'kubeflow-user-guillaume-etevenard',
  'resourceVersion': '6794786934',
  'uid': 'ebf2f2b7-2d35-4acc-a04f-2b3545324b00'},
 'spec': {'predictor': {'imagePullSecrets': [{'name': 'registry-secret'}],
   'serviceAccountName': 's3-kserve-sa',
   'xgboost': {'name': 'kserve-container',
    'protocolVersion': 'v2',
    'resources': {'limits': {'cpu': '1', 'memory': '2Gi'},


### 4.1.5 get the inference service status over time

In [6]:
KServe.get(name, namespace=namespace, watch=True, timeout_seconds=120)

NAME                 READY                           PREV                    LATEST URL                                                              
xgb                  Unknown                            0                       100                                                                  
xgb                  Unknown                            0                       100                                                                  
xgb                  Unknown                            0                       100                                                                  
xgb                  Unknown                            0                       100                                                                  
xgb                  True                               0                       100 http://xgb-kubeflow-user-guillaume-etevenard.kubeflow.aiengine...


In [7]:
import requests

### using Kserve api, get the url of the service
isvc_resp = KServe.get(name, namespace=namespace)
isvc_url = isvc_resp['status']['address']['url']

In [None]:
### Print the URL to se how to query the model
isvc_url

In [9]:
### Test the API : try a GET to the models/modelname route
requests.get(f'http://{name}.{namespace}.svc.cluster.local/v2/models/{name}').text

'{"name":"xgb","versions":[],"platform":"","inputs":[],"outputs":[]}'

#### 4.1.6 Consider that the model is deployed IF : 

- KServe.get(..., namespace=..., watch=True, timeout_seconds=120) return READY = TRUE
- Interface shows all green checks

![serving_summary](./images/serving_summary.png)

![serving_details](./images/serving_details.png)

## 4.2 Query the model

### 4.2.1 Get sample data from the original dataset

In [None]:
data_list = [
        [
            746.0,
            3.34,
            6.0,
            41.9442266,
            -87.65599818,
            16.0,
            11.75,
            0.0,
            0.0,
            11.75
        ],
        [
            681.0,
            3.0,
            8.0,
            41.89960211,
            -87.63330804,
            24.0,
            11.25,
            0.0,
            0.0,
            11.25
        ]
      ]

### 4.2.2 Create the object for query 

In [10]:

inference_input = {
  "inputs": [
    {
      "name": "input-0",
      "shape": [2, 10],
      "datatype": "FP32",
      "data": data_list
    }
  ]
}

{"model_name":"xgb","model_version":null,"id":"9976bc81-d3b1-42a8-9386-89fcb747ad67","parameters":null,"outputs":[{"name":"predict","shape":[2],"datatype":"FP32","parameters":null,"data":[-0.018072307109832764,-0.027862250804901123]}]}


### 4.2.3 Query the model

In [None]:
response = requests.post(isvc_url,json=inference_input)
### you should get a json object ending with "data:[prediction1,prediction2]"
print(response.text)

## Go further

We see how to create an inference service from this notebook. to go further we could create it from a pipeline component

In [None]:
### this is the kserve component, prebuild and available on github
kserve_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/master/components/kserve/component.yaml')


In [None]:
from kubernetes import client as k8s_client
pipeline_conf = kfp.dsl.PipelineConf()
pipeline_conf.set_image_pull_secrets([k8s_client.V1ObjectReference(name='registry-secret')])

@dsl.pipeline(
  name='KServe pipeline',
  description='A pipeline for creating inference service from s3 model.'
)
def kservePipeline():
action = ...
model_name = ...
model_uri = ...
namespace = ...
framework = ...

kserve = kserve_op(action = action,
                             model_name=model_name,
                             model_uri=model_uri,
                             namespace=namespace,
                             framework=framework,
                             ### how to add s3 SA ?
                             ### how to set protocal version ? 
                             ).set_image_pull_policy('Always')


In [None]:
run_id = ...