# XGBoost Endpoint - Multiple Models hosted on same instance
<h4>Invoke Specific Model</h4>

In [None]:
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import os
import json

import boto3
import re
from sagemaker import get_execution_role
import sagemaker

# SDK 2 serializers and deserializers
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer

In [None]:
# We need to specify the location of each model - relative path is fine
# TODO - Update your path here
targetModels = ["xgboost-bikerental-hyper-one-2021-06-28-17-25-58-077/output/model.tar.gz",
                "xgboost-bikerental-hyper-two-2021-06-28-17-29-46-708/output/model.tar.gz"]

In [None]:
# Create a predictor and point to an existing endpoint (note this endpoint has two versions of the model)
endpoint_name = 'xgboost-bikerental-hyper'
predictor = sagemaker.predictor.Predictor (endpoint_name=endpoint_name)
predictor.serializer = CSVSerializer()

In [None]:
# Read Test Data
df_all = pd.read_csv('bike_test.csv')

In [None]:
df_all.head()

In [None]:
# Need to pass an array to the prediction
# can pass a numpy array or a list of values [[19,1],[20,1]]
arr_test = df_all[df_all.columns[1:]].values

In [None]:
arr_test.shape

### Invoke Model One

In [None]:
# target_model = path to the model artifact. For multi-model endpoints, we need to provide the path to the model artifact
# Call the first model
# Output is a JSON List
result = predictor.predict(
    arr_test[:5], 
    target_model=targetModels[0])

print(result)

### Invoke Model Two

In [None]:
# target_model = path to the model artifact. For multi-model endpoints, we need to provide the path to the model artifact
# Call the second model
# Output is a JSON List
result = predictor.predict(
    arr_test[:5], 
    target_model=targetModels[1])

print(result)

### Split the input data into chunks
There are thousands of rows in this data set for which need inference.  
When communicating over internet, it is a good idea to split the data into chunks to prevent payload and timeout error

In [None]:
def inference_by_version(targetModel = None):
    # For large number of predictions, we can split the input data and
    # Query the prediction service.
    # array_split is convenient to specify how many splits are needed
    predictions = []
    for arr in np.array_split(arr_test,10):
        result = predictor.predict(arr, target_model=targetModel)
        result = json.loads(result.decode("utf-8"))
        print (arr.shape)
        predictions += [float(r) for r in result]
        
    return predictions

### Use all available variants

In [None]:
print('model one inference')
df_all["count_hyper_one"] = np.expm1(inference_by_version(targetModel=targetModels[0]))
print('model two inference')
df_all["count_hyper_two"] = np.expm1(inference_by_version(targetModel=targetModels[1]))

In [None]:
df_all[["count_hyper_one","count_hyper_two"]].describe()

In [None]:
# Delete Endpoint to prevent unnecessary charges
predictor.delete_endpoint()