In [None]:
import sagemaker
import boto3
from sagemaker.model import Model
from sagemaker.model import FrameworkModel
#from sagemaker.pipeline_model import PipelineModel
from sagemaker.pipeline import PipelineModel
from sagemaker import get_execution_role

from time import gmtime, strftime

from sagemaker.predictor import json_serializer, json_deserializer, Predictor
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

In [None]:
sagemaker_session = sagemaker.Session()
role = get_execution_role()

In [None]:
model_a_name = 'mock-model-a-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
model_b_name = 'mock-model-b-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
model_pipeline_name = 'mock-model-pipeline-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())

 --- --- Version 1 - working example --- ---
 
This example uses the same container (same container_image_uri) for each container in the serial pipeline.  This is by design, and only as a convenience - using the multples of the same container.

In [None]:
ecr_repository_name = "mckv"
account_id = role.split(':')[4]
region = boto3.Session().region_name

container_image_uri = '{0}.dkr.ecr.{1}.amazonaws.com/{2}:latest'.format(account_id, region, ecr_repository_name)
print(container_image_uri)

In [None]:
model_a = Model(name = model_a_name,   
                       image_uri = container_image_uri,
                       role=role,
                       env = {
                           'SAGEMAKER_USE_NGINX': 'true',
                           'SAGEMAKER_MODEL_SERVER_WORKERS': '1',
                           'SAGEMAKER_MODEL_SERVER_TIMEOUT': '300'
                       },
                       #code_location=s3_code_location,
                       #sagemaker_session=sagemaker_session #comment this line for local mode.
                      )

In [None]:
model_b = Model(name = model_b_name,   
                       image_uri = container_image_uri,
                       role=role,
                       env = {
                           'SAGEMAKER_USE_NGINX': 'true',
                           'SAGEMAKER_MODEL_SERVER_WORKERS': '1',
                           'SAGEMAKER_MODEL_SERVER_TIMEOUT': '300'
                       },
                       #code_location=s3_code_location,
                       #sagemaker_session=sagemaker_session #comment this line for local mode.
                      )

In [None]:
endpoint_name = 'mock-models-ep-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print(endpoint_name)

In [None]:
# Create SageMaker Inference Pipeline, serial mode

pipeline_model = PipelineModel(name=model_pipeline_name,
                               role=role,
                               models=[
                                    model_a,
                                    # model_b
                               ])

In [None]:
pred = pipeline_model.deploy(initial_instance_count=1,
                    #instance_type="ml.m5.large",
                    instance_type= "ml.c4.xlarge",
                    endpoint_name=endpoint_name)

In [None]:
predictor = Predictor(endpoint_name=endpoint_name,
                      sagemaker_session=sagemaker.Session(),
                      serializer=JSONSerializer(),
                      deserializer=JSONDeserializer(),
#                       content_type='application/json',
#                       accept='application/json'
                     )

In [None]:
payload = {"key": "SomeKeyName", "value": 10}

In [None]:
print(predictor.predict(payload))

In [None]:
predictor.delete_endpoint()

 --- --- Version 2 - working example --- ---
 
This example uses different container_image_uri (a and b) for each container in the serial pipeline.  Though there there are different variable names for the container, the same container is still be used to illustrate Pipeline Inference.  This is by design as it makes it "easier" to plug in your own containers to experiement.

In [None]:
ecr_repository_name_a = "mckv"
account_id = role.split(':')[4]
region = boto3.Session().region_name

container_image_uri_a = '{0}.dkr.ecr.{1}.amazonaws.com/{2}:latest'.format(account_id, region, ecr_repository_name_a)
print(container_image_uri_a)

In [None]:
ecr_repository_name_b = "mckv"
account_id = role.split(':')[4]
region = boto3.Session().region_name

container_image_uri_b = '{0}.dkr.ecr.{1}.amazonaws.com/{2}:latest'.format(account_id, region, ecr_repository_name_b)
print(container_image_uri_b)

In [None]:
model_a = Model(name = model_a_name,   
                       image_uri = container_image_uri_a,
                       role=role,
                       env = {
                           'SAGEMAKER_USE_NGINX': 'true',
                           'SAGEMAKER_MODEL_SERVER_WORKERS': '1',
                           'SAGEMAKER_MODEL_SERVER_TIMEOUT': '300'
                       })

In [None]:
model_b = Model(name = model_b_name,   
                       image_uri = container_image_uri_b,
                       role=role,
                       env = {
                           'SAGEMAKER_USE_NGINX': 'true',
                           'SAGEMAKER_MODEL_SERVER_WORKERS': '1',
                           'SAGEMAKER_MODEL_SERVER_TIMEOUT': '300'
                       })

In [None]:
endpoint_name = 'mock-models-ep-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print(endpoint_name)

In [None]:
# Create SageMaker Inference Pipeline, serial

pipeline_model = PipelineModel(name=model_pipeline_name,
                               role=role,
                               models=[
                                    model_a,
                                    model_b
                               ])

In [None]:
pred = pipeline_model.deploy(initial_instance_count=1,
                    #instance_type="ml.m5.large",
                    instance_type= "ml.c4.xlarge",
                    endpoint_name=endpoint_name)

In [None]:
predictor = Predictor(endpoint_name=endpoint_name,
                      sagemaker_session=sagemaker.Session(),
                      serializer=JSONSerializer(),
                      deserializer=JSONDeserializer(),
                     )

In [None]:
payload = {"key": "sme", "value": 10}

In [None]:
print(predictor.predict(payload))

Clean up your resources, so not extra billings.  Endpoints, Endpoint Configurations, and Models

In [None]:
predictor.delete_endpoint()