# Inference Service

* Create an inference service to serve one of the registered models
* Fetch online features and calculate predictions from the inference service

In [46]:
# !pip install --upgrade pip
# !cat requirements.`txt
# !pip install -q -r requirements.txt
# !pip uninstall -r requirements.txt --yes
# !pip install --no-deps feast==0.36.0
!pip install --no-deps --ignore-requires-python "https://github.com/opendatahub-io/ml-metadata/releases/download/v1.14.0%2Bremote.1/ml_metadata-1.14.0+remote.1-py3-none-any.whl" # need a Python 3.11 compatible version
!pip install --no-deps --ignore-requires-python "model-registry==0.1.2" # ignore dependencies because of the above override
!pip uninstall ray --yes
!pip install "ray[serve]<=2.9.3,>=2.9.2"
!pip install kserve[storage]>=0.12.0
!pip list | grep ray

Collecting ml-metadata==1.14.0+remote.1
  Downloading https://github.com/opendatahub-io/ml-metadata/releases/download/v1.14.0%2Bremote.1/ml_metadata-1.14.0+remote.1-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.4/78.4 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
E0419 14:32:46.722302831   15001 backup_poller.cc:127]                 Run client channel backup poller: UNKNOWN:pollset_work {created_time:"2024-04-19T14:32:46.722108608+00:00", children:[UNKNOWN:Bad file descriptor {syscall:"epoll_wait", os_error:"Bad file descriptor", errno:9, created_time:"2024-04-19T14:32:46.72204918+00:00"}]}
Found existing installation: ray 2.9.3
Uninstalling ray-2.9.3:
  Successfully uninstalled ray-2.9.3
Collecting ray<=2.9.3,>=2.9.2 (from ray[serve]<=2.9.3,>=2.9.2)
  Downloading ray-2.9.3-cp39-cp39-manylinux2014_x86_64.whl.metadata (13 kB)
Collecting fastapi<=0.108.0 (from ray[serve]<=2.9.3,>=2.9.2)
  Downloading fastapi-0.108.0-py3-none-any.whl.metadata 

## Imports and constants

In [47]:
import os

import boto3
import matplotlib.pyplot as plt
# import onnx
# import onnxruntime as ort
# from feast import FeatureStore
from kubernetes import client 
from kserve import KServeClient
from kserve import constants
from kserve import utils
from kserve import V1beta1InferenceService
from kserve import V1beta1InferenceServiceSpec
from kserve import V1beta1PredictorSpec
from kserve import V1beta1SKLearnSpec
from kserve import V1beta1ModelSpec
from kserve import V1beta1ModelFormat
from kserve import V1alpha1ServingRuntime

from model_registry import ModelRegistry
from model_registry.types import ContextState
from ray.serve.handle import RayServeHandle

In [48]:
os.environ['accesskey'] = 'minio'
os.environ['secretkey'] = 'minio123'
os.environ['AWS_S3_ENDPOINT'] = 'http://minio-service.feast.svc.cluster.local:9000'
os.environ['AWS_S3_BUCKET'] = 'feast'
os.environ['AWS_DEFAULT_REGION'] = 'default'

In [49]:
%env MODEL_NAMESPACE=feast-runbook

env: MODEL_NAMESPACE=feast-runbook


## Shared functions

In [50]:
def modelRegistry():
    return ModelRegistry(server_address="modelregistry-sample.feast-runbook.svc.cluster.local", port=9090, author="feast-dev@redhat.com")

In [51]:
registry = modelRegistry()

models = registry._api.get_registered_models()
for model in models:
    print(f"Model {model.name}")
    model_versions = registry._api.get_model_versions(model.id)
    for model_version in model_versions:
        print(f"Version {model_version.name}")
        
# Update the logic to select the model and the version
selected_model = models[0]
live_model_versions = [m for m in registry._api.get_model_versions(selected_model.id) if m.state!=ContextState.ARCHIVED]
selected_model_version = live_model_versions[0]
selected_model_artifact = registry._api.get_model_artifacts(selected_model_version.id)[0]

print(f"Creating inference service for model: {selected_model}")
print(f"Model version: {selected_model_version}")
print(f"Model artifact: {selected_model_artifact}")

INFO:absl:MetadataStore with gRPC connection initialized


StoreException: Context type kf.RegisteredModel does not exist

In [33]:
%env MR_URL=http://modelregistry-sample.feast-runbook.svc.cluster.local:8080
!curl -X GET -H 'Accept: application/json' "${MR_URL}/api/model_registry/v1alpha3/serving_environments"

env: MR_URL=http://modelregistry-sample.feast-runbook.svc.cluster.local:8080
{"items":[],"nextPageToken":"","pageSize":0,"size":0}


In [34]:
%env WORKSPACE=feast-runbook
!oc get inferenceservices -n $WORKSPACE 

env: WORKSPACE=feast-runbook
No resources found in feast-runbook namespace.


In [9]:
# !oc describe inferenceservices mnist-simplenn -n $WORKSPACE

In [26]:
%env MODEL_URL=http://modelmesh-serving.feast-runbook:8008

env: MODEL_URL=http://modelmesh-serving.feast-runbook:8008


In [27]:
!curl -X GET -H 'Accept: application/json' "${MODEL_URL}/v1/models/mnist-simplenn"

curl: (6) Could not resolve host: modelmesh-serving.feast-runbook


In [28]:
%env MODEL=http://modelmesh-serving.feast-runbook.svc.cluster.local:8008

env: MODEL=http://modelmesh-serving.feast-runbook.svc.cluster.local:8008


In [13]:
!echo $MODEL

http://modelmesh-serving.feast-runbook.svc.cluster.local:8008


In [14]:
!curl -v -H 'Accept: application/json' $MODEL/v2

* Could not resolve host: modelmesh-serving.feast-runbook.svc.cluster.local
* Closing connection 0
curl: (6) Could not resolve host: modelmesh-serving.feast-runbook.svc.cluster.local


## Create the inference servicem

**Prerequisites**
* Create a `Model Server`called `mnist` under the current workspace `feast-runbook`

Create the `InferenceService` (see [Create the InferenceService](https://kserve.github.io/website/master/modelserving/v1beta1/onnx/#create-the-inferenceservice))

In [35]:
name=selected_model.name
kserve_version='v1beta1'
api_version = constants.KSERVE_GROUP + '/' + kserve_version
namespace = os.environ['MODEL_NAMESPACE']
storage_uri = selected_model_artifact.uri
model_format_name = selected_model_artifact.model_format_name
model_format_version = selected_model_artifact.model_format_version

storageUri formatted like model-registry://{registeredModelName}/{versionName}

isvc = V1beta1InferenceService(api_version=api_version,
                               kind=constants.KSERVE_KIND,
                               metadata=client.V1ObjectMeta(
                                   name=name, namespace=namespace,
                                   labels={'modelregistry/registered-model-id': selected_model.id, 
                                           'modelregistry/model-version-id': selected_model_version.id}
                               ),
                               spec=V1beta1InferenceServiceSpec(
                                   predictor=V1beta1PredictorSpec(
                                       model=V1beta1ModelSpec(
                                           storage_uri=storage_uri,
                                           model_format=V1beta1ModelFormat(name=model_format_name, version=model_format_version),
                                           runtime="mnist",
                                           protocol_version='v2' # see https://kserve.github.io/website/master/modelserving/v1beta1/onnx/#create-the-inferenceservice
                                 )
                               )))
print(f"Creating InferenceService {isvc}")

Creating InferenceService {'api_version': 'serving.kserve.io/v1beta1',
 'kind': 'InferenceService',
 'metadata': {'annotations': None,
              'creation_timestamp': None,
              'deletion_grace_period_seconds': None,
              'deletion_timestamp': None,
              'finalizers': None,
              'generate_name': None,
              'generation': None,
              'labels': {'modelregistry/model-version-id': '12',
                         'modelregistry/registered-model-id': '1'},
              'managed_fields': None,
              'name': 'mnist',
              'namespace': 'feast-runbook',
              'owner_references': None,
              'resource_version': None,
              'self_link': None,
              'uid': None},
 'spec': {'explainer': None,
          'predictor': {'active_deadline_seconds': None,
                        'affinity': None,
                        'annotations': None,
                        'automount_service_account_token': None

In [36]:
KServe = KServeClient()
KServe.create(isvc)

{'apiVersion': 'serving.kserve.io/v1beta1',
 'kind': 'InferenceService',
 'metadata': {'creationTimestamp': '2024-04-19T14:03:11Z',
  'generation': 1,
  'labels': {'modelregistry/model-version-id': '12',
   'modelregistry/registered-model-id': '1'},
  'managedFields': [{'apiVersion': 'serving.kserve.io/v1beta1',
    'fieldsType': 'FieldsV1',
    'fieldsV1': {'f:metadata': {'f:labels': {'.': {},
       'f:modelregistry/model-version-id': {},
       'f:modelregistry/registered-model-id': {}}},
     'f:spec': {'.': {},
      'f:predictor': {'.': {},
       'f:model': {'.': {},
        'f:modelFormat': {'.': {}, 'f:name': {}, 'f:version': {}},
        'f:name': {},
        'f:protocolVersion': {},
        'f:runtime': {},
        'f:storageUri': {}}}}},
    'manager': 'OpenAPI-Generator',
    'operation': 'Update',
    'time': '2024-04-19T14:03:11Z'}],
  'name': 'mnist',
  'namespace': 'feast-runbook',
  'resourceVersion': '2001438',
  'uid': 'ffa12d39-6faf-469e-a0bc-a01da902aacd'},
 'spec

In [None]:
connection_secret = f'''
kind: Secret
apiVersion: v1
metadata:
  name: aws-connection-mnist-simplenn
  namespace: feast-runbook
  annotations:
    opendatahub.io/connection-type: s3
    openshift.io/display-name: mnist-simpleNN
stringData:
  AWS_ACCESS_KEY_ID: {os.environ['accesskey']}
  AWS_DEFAULT_REGION: {os.environ['AWS_DEFAULT_REGION']}
  AWS_S3_BUCKET: {os.environ['AWS_S3_BUCKET']}
  AWS_S3_ENDPOINT: {os.environ['AWS_S3_ENDPOINT']}
  AWS_SECRET_ACCESS_KEY: {os.environ['secretkey']}
type: Opaque
'''

# with open("connection_secret.yaml", 'w') as file:
#     file.write(connection_secret)


In [None]:
"""
spec:
  predictor:
    model:
      modelFormat:
        name: onnx
        version: '1'
      runtime: mnist
      storage:
        key: aws-connection-mnistsimplenn
        path: v.simple_NN.20240419101713/simple_NN.onnx
"""