# Inference Service

* Create an inference service to serve one of the registered models
* Fetch online features and calculate predictions from the inference service

In [1]:
# !pip install --upgrade pip
# !cat requirements.`txt
# !pip install -q -r requirements.txt
# !pip uninstall -r requirements.txt --yes
# !pip install --no-deps feast==0.36.0
!pip install --no-deps --ignore-requires-python "https://github.com/opendatahub-io/ml-metadata/releases/download/v1.14.0%2Bremote.1/ml_metadata-1.14.0+remote.1-py3-none-any.whl" # need a Python 3.11 compatible version
!pip install --no-deps --ignore-requires-python "model-registry==0.1.2" # ignore dependencies because of the above override
!pip uninstall ray --yes
!pip install "ray[serve]<=2.9.3,>=2.9.2"
!pip install kserve[storage]>=0.12.0
!pip list | grep ray

Collecting ml-metadata==1.14.0+remote.1
  Downloading https://github.com/opendatahub-io/ml-metadata/releases/download/v1.14.0%2Bremote.1/ml_metadata-1.14.0+remote.1-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.4/78.4 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ml-metadata
Successfully installed ml-metadata-1.14.0+remote.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Collecting model-registry==0.1.2
  Downloading model_registry-0.1.2-py3-none-any.whl (16 kB)
Installing collected packages: model-registry
Successfully installed model-registry-0.1.2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnot

## Imports and constants

In [40]:
import os

import boto3
import matplotlib.pyplot as plt
# import onnx
# import onnxruntime as ort
# from feast import FeatureStore
from kubernetes import client 
from kserve import KServeClient
from kserve import constants
from kserve import utils
from kserve import V1beta1InferenceService
from kserve import V1beta1InferenceServiceSpec
from kserve import V1beta1PredictorSpec
from kserve import V1beta1SKLearnSpec
from kserve import V1beta1ModelSpec
from kserve import V1beta1ModelFormat
from kserve import V1beta1StorageSpec
from model_registry import ModelRegistry
from model_registry.types import ContextState
from ray.serve.handle import RayServeHandle
from urllib.parse import urlparse

In [3]:
os.environ['accesskey'] = 'minio'
os.environ['secretkey'] = 'minio123'
os.environ['AWS_S3_ENDPOINT'] = 'http://minio-service.feast.svc.cluster.local:9000'
os.environ['AWS_S3_BUCKET'] = 'feast'
os.environ['AWS_DEFAULT_REGION'] = 'default'

In [4]:
%env MODEL_NAMESPACE=feast-runbook

env: MODEL_NAMESPACE=feast-runbook


## Shared functions

In [5]:
def modelRegistry():
    return ModelRegistry(server_address="modelregistry-sample.feast.svc.cluster.local", port=9090, author="feast-dev@redhat.com")

In [6]:
registry = modelRegistry()

models = registry._api.get_registered_models()
for model in models:
    print(f"Model {model.name}")
    model_versions = registry._api.get_model_versions(model.id)
    for model_version in model_versions:
        print(f"Version {model_version.name}")
        
# Update the logic to select the model and the version
selected_model = models[0]
live_model_versions = [m for m in registry._api.get_model_versions(selected_model.id) if m.state!=ContextState.ARCHIVED]
selected_model_version = live_model_versions[0]
selected_model_artifact = registry._api.get_model_artifacts(selected_model_version.id)[0]

print(f"Creating inference service for model: {selected_model}")
print(f"Model version: {selected_model_version}")
print(f"Model artifact: {selected_model_artifact}")

INFO:absl:MetadataStore with gRPC connection initialized


Model mnist
Version v.simple_NN.20240419150600
Version v.convolutedNN.202404191508261
Creating inference service for model: RegisteredModel(id='1', description='', external_id='', create_time_since_epoch=1713539173108, last_update_time_since_epoch=1713539173108, state=<ContextState.LIVE: 'LIVE'>, name='mnist')
Model version: ModelVersion(name='v.simple_NN.20240419150600', id='2', description='demo20231121 e2e MNIST', external_id='', create_time_since_epoch=1713539173809, last_update_time_since_epoch=1713539173809, state=<ContextState.LIVE: 'LIVE'>, model_name='mnist', version='v.simple_NN.20240419150600', author='feast-dev@redhat.com', metadata={'AWS_S3_BUCKET': 'feast', 'AWS_S3_ENDPOINT': 'http://minio-service.feast.svc.cluster.local:9000', 'AWS_DEFAULT_REGION': 'default'}, artifacts=NOTHING, _registered_model_id=NOTHING)
Model artifact: ModelArtifact(id='1', description='', external_id='', create_time_since_epoch=1713539174615, last_update_time_since_epoch=1713539174615, name='mnist'

Change this variabkle to use a different `Data Science Project`

In [8]:
%env PROJECT=feast
!oc get inferenceservices -n $PROJECT 

env: PROJECT=feast
No resources found in feast namespace.


In [9]:
# !oc describe inferenceservices mnist-simplenn -n $WORKSPACE

In [10]:
%env MODEL_URL=http://modelmesh-serving.feast:8008

env: MODEL_URL=http://modelmesh-serving.feast:8008


In [11]:
!curl -X GET -H 'Accept: application/json' "${MODEL_URL}/v1/models/mnist-simplenn"

curl: (6) Could not resolve host: modelmesh-serving.feast


In [12]:
%env MODEL=http://modelmesh-serving.feast-runbook.svc.cluster.local:8008

env: MODEL=http://modelmesh-serving.feast-runbook.svc.cluster.local:8008


In [13]:
!echo $MODEL

http://modelmesh-serving.feast-runbook.svc.cluster.local:8008


In [14]:
!curl -v -H 'Accept: application/json' $MODEL/v2

E0419 15:40:49.255177264     316 backup_poller.cc:127]                 Run client channel backup poller: UNKNOWN:pollset_work {created_time:"2024-04-19T15:40:49.254911013+00:00", children:[UNKNOWN:Bad file descriptor {syscall:"epoll_wait", os_error:"Bad file descriptor", errno:9, created_time:"2024-04-19T15:40:49.254827636+00:00"}]}
* Could not resolve host: modelmesh-serving.feast-runbook.svc.cluster.local
* Closing connection 0
curl: (6) Could not resolve host: modelmesh-serving.feast-runbook.svc.cluster.local


## Create the Inference Service

**Prerequisites**
* Create a `Model Server`called `mnist` under the current workspace `feast`

Create a Data Connection to the S3 bucket

In [15]:
connection_secret = f'''
kind: Secret
apiVersion: v1
metadata:
  name: {selected_model.name}-s3
  namespace: feast
  labels:
    opendatahub.io/dashboard: 'true'
    opendatahub.io/managed: 'true'
  annotations:
    opendatahub.io/connection-type: s3
    openshift.io/display-name: {selected_model.name}-s3
stringData:
  AWS_ACCESS_KEY_ID: {os.environ['accesskey']}
  AWS_DEFAULT_REGION: {os.environ['AWS_DEFAULT_REGION']}
  AWS_S3_BUCKET: {os.environ['AWS_S3_BUCKET']}
  AWS_S3_ENDPOINT: {os.environ['AWS_S3_ENDPOINT']}
  AWS_SECRET_ACCESS_KEY: {os.environ['secretkey']}
type: Opaque
'''

with open("connection_secret.yaml", 'w') as file:
    file.write(connection_secret)

In [16]:
!oc delete -f connection_secret.yaml
!oc create -f connection_secret.yaml

secret "mnist-s3" deleted
secret/mnist-s3 created


Create the `InferenceService` (see [Create the InferenceService](https://kserve.github.io/website/master/modelserving/v1beta1/onnx/#create-the-inferenceservice))

In [41]:
name=selected_model.name
kserve_version='v1beta1'
api_version = constants.KSERVE_GROUP + '/' + kserve_version
namespace = os.environ['PROJECT']
storage_uri = selected_model_artifact.uri
# s3://feast/v.simple_NN.20240419150600/simple_NN.onnx?endpoint=http://minio-service.feast.svc.cluster.local:9000&defaultRegion=default
storage_path = urlparse(selected_model_artifact.uri).path.lstrip('/')
model_format_name = selected_model_artifact.model_format_name
model_format_version = selected_model_artifact.model_format_version

isvc = V1beta1InferenceService(api_version=api_version,
                               kind=constants.KSERVE_KIND,
                               metadata=client.V1ObjectMeta(
                                   name=name, namespace=namespace,
                                   labels={'modelregistry/registered-model-id': selected_model.id, 
                                           'modelregistry/model-version-id': selected_model_version.id,
                                            'opendatahub.io/dashboard': 'true',}
                               ),
                               spec=V1beta1InferenceServiceSpec(
                                   predictor=V1beta1PredictorSpec(
                                       model=V1beta1ModelSpec(
                                           storage=V1beta1StorageSpec(
                                               key=f'{selected_model.name}-s3',
                                               path=storage_path,
                                           ),
                                           model_format=V1beta1ModelFormat(name=model_format_name, version=model_format_version),
                                           runtime="mnist",
                                           protocol_version='v2' # see https://kserve.github.io/website/master/modelserving/v1beta1/onnx/#create-the-inferenceservice
                                 )
                               )))
print(f"Creating InferenceService {isvc}")

Creating InferenceService {'api_version': 'serving.kserve.io/v1beta1',
 'kind': 'InferenceService',
 'metadata': {'annotations': None,
              'creation_timestamp': None,
              'deletion_grace_period_seconds': None,
              'deletion_timestamp': None,
              'finalizers': None,
              'generate_name': None,
              'generation': None,
              'labels': {'modelregistry/model-version-id': '2',
                         'modelregistry/registered-model-id': '1',
                         'opendatahub.io/dashboard': 'true'},
              'managed_fields': None,
              'name': 'mnist',
              'namespace': 'feast',
              'owner_references': None,
              'resource_version': None,
              'self_link': None,
              'uid': None},
 'spec': {'explainer': None,
          'predictor': {'active_deadline_seconds': None,
                        'affinity': None,
                        'annotations': None,
           

In [42]:
!oc get inferenceservices -n $PROJECT
!oc delete inferenceservice mnist -n $PROJECT

NAME    URL   READY   PREV   LATEST   PREVROLLEDOUTREVISION   LATESTREADYREVISION   AGE
mnist                                                                               14m
inferenceservice.serving.kserve.io "mnist" deleted


In [43]:
KServe = KServeClient()
KServe.create(isvc)

{'apiVersion': 'serving.kserve.io/v1beta1',
 'kind': 'InferenceService',
 'metadata': {'creationTimestamp': '2024-04-19T15:59:48Z',
  'generation': 1,
  'labels': {'modelregistry/model-version-id': '2',
   'modelregistry/registered-model-id': '1',
   'opendatahub.io/dashboard': 'true'},
  'managedFields': [{'apiVersion': 'serving.kserve.io/v1beta1',
    'fieldsType': 'FieldsV1',
    'fieldsV1': {'f:metadata': {'f:labels': {'.': {},
       'f:modelregistry/model-version-id': {},
       'f:modelregistry/registered-model-id': {},
       'f:opendatahub.io/dashboard': {}}},
     'f:spec': {'.': {},
      'f:predictor': {'.': {},
       'f:model': {'.': {},
        'f:modelFormat': {'.': {}, 'f:name': {}, 'f:version': {}},
        'f:name': {},
        'f:protocolVersion': {},
        'f:runtime': {},
        'f:storage': {'.': {}, 'f:key': {}, 'f:path': {}}}}}},
    'manager': 'OpenAPI-Generator',
    'operation': 'Update',
    'time': '2024-04-19T15:59:48Z'}],
  'name': 'mnist',
  'namespa

In [44]:
!oc get inferenceservices -n $PROJECT

NAME    URL   READY   PREV   LATEST   PREVROLLEDOUTREVISION   LATESTREADYREVISION   AGE
mnist                                                                               4s


In [None]:
KServe.de

In [None]:
"""
spec:
  predictor:
    model:
      modelFormat:
        name: onnx
        version: '1'
      runtime: mnist
      storage:
        key: aws-connection-mnistsimplenn
        path: v.simple_NN.20240419101713/simple_NN.onnx
"""