# Exercise 6: Serve a model

## What we will cover:
- Create a Kserve container
- Register the model and move it to Production
- Prediction test to validate the results

### Set up the group parameter

In [None]:
## set your group name (in quotes)
group_name = ""

In [None]:
# Check if GROUP is empty
if not group_name:
    print("Please set your group name before proceeding to the next cell.")
    # You can also raise an exception to stop execution if needed
    # raise ValueError("Group name is empty. Please set a valid group name.")
else:
    print("Group name is set. Proceed to the next cell.")
    # Your code for the next cell can go here

### Double Checking

In [None]:
# Check if GROUP is empty
if not group_name:
    print("Please set your group name before proceeding to the next cell.")
    # You can also raise an exception to stop execution if needed
    # raise ValueError("Group name is empty. Please set a valid group name.")

# Prerequisites 
<div class="alert alert-block alert-danger">
<b>Important</b> Make sure it's valid
</div>


In [None]:
# Set parametes

# adapt to your EZUA Domain name
EZAF_ENV = "pe1.ezmeral.de"
# type of demo (DemoContent - in this case fruit and vegetables) 
DC = "fruit"
# path to end2end demo (not data)
end2end_path = '/mnt/datasources/datafabric/ezua/end2end/' 
# path to data for model training, etc.
path = '/mnt/datasources/datafabric/ezua/end2end-data/fruits/' 
# path to GROUP INDIVIDUAL data for model training, etc.
group_data_path = '/mnt/datasources/datafabric/ezua/end2end-group-data/' 
# experiment name prefix for mlflow
experiment_name = "end2end-retail-demo"
exp_name = group_name + "-" + experiment_name
model_name = "end2end-retail-demo"
g_model_name = group_name + "-" + model_name
# artifact_path = "end2end-retail-demo"
artifact_path = "model"
# password for UA user login (needed to get keycloak token)
UA_password = "Hpepoc@123"

### Import required libraries & refresh the token
- Ignore the warnings

In [None]:
from kubernetes import client 
from kubernetes.client import V1EnvVar
from kubernetes.client.models import V1ObjectMeta
from kserve import KServeClient
from kserve import constants
from kserve import utils
from kserve import V1beta1InferenceService
from kserve import V1beta1InferenceServiceSpec
from kserve import V1beta1PredictorSpec
from kserve import V1beta1TFServingSpec
import urllib3
import mlflow
import requests
from PIL import Image
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import os

In [None]:
%update_token

## 1. Model Serving with KServe

### Get details from MLflow
- Connect to MLflow
- Get the last version like it the last exercise please `Fill in` the missing parts
- Get the model URI

In [None]:
# create an instance of the MlflowClient
client = mlflow.tracking.MlflowClient()

# Get the latest model version in Production
latest_versions = client.get_latest_versions(name=g_model_name, stages=["Production"])
latest_version = latest_versions[0]

# Get the model uri
model_uri = latest_version.source.replace("model", "tf_serving_model")

print("Model Storage Path in S3 (as shown in MLflow): " + str(model_uri))

### Create the Inference Service using YAML config file
- Set paramenters
- Create a yaml file with the name set in parameter 'yaml_name' (see below)
- The yaml cointains Kubernetes (k8s) objects "Secret", "ServiceAccount" and "InferenceService"
- Execute a kubctl command on the 'shell'

In [None]:
# Set paramentes
isvc_name = group_name + "-" + experiment_name + "-isvc"
secret_name = 's3-proxy-kserve-secret'
sa_name = 's3-proxy-kserve-sa'
yaml_name = './s3-proxy-kserve.yaml'

In [None]:
# Create YAML configuration file
with open(yaml_name, 'w') as file:
    text = f"""---
apiVersion: v1
kind: Secret
metadata:
  name: "{secret_name}"
  annotations:
    serving.kserve.io/s3-cabundle: ""
    serving.kserve.io/s3-endpoint: "local-s3-service.ezdata-system.svc.cluster.local:30000/"
    serving.kserve.io/s3-useanoncredential: "false"
    serving.kserve.io/s3-usehttps: "0"
    serving.kserve.io/s3-verifyssl: "0"
stringData:
  AWS_ACCESS_KEY_ID: "{os.environ['AUTH_TOKEN']}"
  AWS_SECRET_ACCESS_KEY: "s3"
type: Opaque

---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: "{sa_name}"
secrets:
  - name: "access-token"

---
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
  name: "{isvc_name}"
  annotations:
    "sidecar.istio.io/inject": "false"
spec:
  predictor:
    tensorflow:
      storageUri: "{model_uri}"
    serviceAccountName: "{sa_name}"
"""
    file.write(text)

In [None]:
# Create the container
!kubectl apply -f {yaml_name}

In [None]:
# Wait until the ISvc is ready
kserve_client = KServeClient()
kserve_client.wait_isvc_ready(isvc_name, watch=True, timeout_seconds=120)
print(f"\nInferenceService {isvc_name} is ready.")

## Check the results

### Head back to the menu and select `Model Serving`
![image.png](./images/exercise6/menu3.PNG)

### It will open Kubeflow
- You can see here a list of the model served

![image.png](./images/exercise6/Kserve1.PNG)

### Look into your model
- Clic on your model
- Now you can see the model details including the serving endpoint, model URI and predictor used

![image.png](./images/exercise6/Kserve2.PNG)

## 2. Predictions

In [None]:
%update_token

### Setup Paramenters
- `Fill in` the URL using the `URL internal` from Kubeflow
- We're using the URL internal because we're making the prediction within the Kubernetes cluster otherwise we would use the External one.

In [None]:
url = "Fill in"

# Build the Serving URL
serving_url = url + "/v1/models/" + isvc_name + ":predict"

print("Serving URL: " + serving_url)

### Define the prediction functions
- The prediction is different when you are serving the model as you don't directly use an image but a json file
- Therefore, the first step is to convert the image to json so that it can be sent to the Inference Service
- We use the same approach to preprocess the image
- Then format it to JSON

In [None]:
def preprocess_image(url):
    # Load the image
    response = requests.get(url, stream=True)
    img = Image.open(response.raw)
    img = img.resize((224, 224))
    img_array = img_to_array(img)
    img_array = img_array / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    
    # Display the image
    display(img)

    return img_array

def format_data(data):
    # Convert the NumPy array to a list
    data_list = data.tolist()
    
    # Format the list as a JSON string
    data_formatted = json.dumps(data_list)
    
    # Create a JSON request string with the formatted data
    json_request = '{{ "instances" : {} }}'.format(data_formatted)
    
    return json_request

### Get the image and tranform it to JSON
- `Fill in` the missing values

In [None]:
# Define your labels
labels = "Fill in"

# Specify the image URL here.
image_url = "Fill in"

preprocessed_image = preprocess_image(image_url)

json_request = format_data(preprocessed_image)

### Create the prediction request
- Use the Serving URL & JSON data generated above
- Create a header that includes the token to authentorize the prediction

In [None]:
# Make the POST request
response = requests.post(serving_url, data=json_request, verify=False)

# Print the raw response content
print("Raw Response Content:")
print(response.content.decode('utf-8'))

# If the response is in JSON format, you can decode it
if response.headers.get("Content-Type") == "application/json":
    response_data = response.json()
    predictions = response_data['predictions']

    formatted_predictions = [[round(pred * 100, 2) for pred in prediction] for prediction in predictions]

    print("\nTranslated Predictions:")
    for label, prob in zip(labels, formatted_predictions[0]):
        print(f"- {label}: \t{prob}%")

    # Get the predicted label
    predicted_label_index = np.argmax(formatted_predictions)
    predicted_label = labels[predicted_label_index]

    print("\nPredicted class label:", predicted_label, "with", formatted_predictions[0][predicted_label_index], "%")

## END