## Multi Container endpoints

In this example we will deploy two different models for summarization and Q&A tasks.
Please note that loading and packaging models may take several minutes.

### Upload model data to S3

In [1]:
import sagemaker
from sagemaker import get_execution_role
import os

sagemaker_session = sagemaker.Session()
role = get_execution_role() 

bucket = sagemaker_session.default_bucket()
prefix = 'multi-model'
s3_path = 's3://{}/{}'.format(bucket, prefix)
region = sagemaker_session.boto_region_name


# Deploy Multi Model Endpoint


In [55]:
EN_MODEL = "distilbert-base-uncased-finetuned-sst-2-english"
GER_MODEL = "oliverguhr/german-sentiment-bert"

In [56]:
# retrieve english model

import os

import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification

en_tokenizer = DistilBertTokenizer.from_pretrained(EN_MODEL)
en_model = DistilBertForSequenceClassification.from_pretrained(EN_MODEL)

inputs = en_tokenizer("Hello, my dog is cute", return_tensors="pt")
with torch.no_grad():
    logits = en_model(**inputs).logits

predicted_class_id = logits.argmax().item()
predictions = en_model.config.id2label[predicted_class_id]

print(predictions)

en_model_path = "models/english_sentiment"
os.makedirs(en_model_path, exist_ok=True)

en_model.save_pretrained(save_directory=en_model_path)
en_tokenizer.save_pretrained(save_directory=en_model_path)


POSITIVE


('models/english_sentiment/tokenizer_config.json',
 'models/english_sentiment/special_tokens_map.json',
 'models/english_sentiment/vocab.txt',
 'models/english_sentiment/added_tokens.json')

In [57]:
# retrieve German model

import torch

from transformers import BertTokenizer, BertForSequenceClassification

ger_tokenizer = BertTokenizer.from_pretrained(GER_MODEL)
ger_model = BertForSequenceClassification.from_pretrained(GER_MODEL)

inputs = ger_tokenizer("Das ist gar nicht mal so gut", return_tensors="pt")
with torch.no_grad():
    logits = ger_model(**inputs).logits

predicted_class_id = logits.argmax().item()
predictions = ger_model.config.id2label[predicted_class_id]

print(predictions)

ger_model_path = "models/german_sentiment"
os.makedirs(ger_model_path, exist_ok=True)

en_model.save_pretrained(save_directory=ger_model_path)
en_tokenizer.save_pretrained(save_directory=ger_model_path)


negative


('models/german_sentiment/tokenizer_config.json',
 'models/german_sentiment/special_tokens_map.json',
 'models/german_sentiment/vocab.txt',
 'models/german_sentiment/added_tokens.json')

# Create Inference Scripts

In [None]:
! pygmentize 1_src/en_inference.py

In [None]:
! pygmentize 2_src/get_inference.py

# Package models

In [None]:
! mkdir models/english_sentiment/code
! cp 1_src/en_inference.py models/english_sentiment/code/inference.py
! tar -czvf models/english_sentiment.tar.gz -C models/english_sentiment/ .


In [None]:
! mkdir models/german_sentiment/code
! cp 1_src/ger_inference.py models/german_sentiment/code/inference.py
! tar -czvf models/german_sentiment.tar.gz -C models/german_sentiment/ .


In [60]:
en_model_data = sagemaker_session.upload_data('models/english_sentiment.tar.gz', bucket=bucket,key_prefix=prefix)
ger_model_data = sagemaker_session.upload_data('models/german_sentiment.tar.gz', bucket=bucket,key_prefix=prefix)


In [62]:
from sagemaker import image_uris

HF_VERSION = '4.17.0'
PT_VERSION = 'pytorch1.10.2'
TF_VERSION = "tensorflow2.6.3"

pt_container_uri = image_uris.retrieve(framework='huggingface',
                                region=region,
                                version=HF_VERSION,
                                image_scope='inference',
                                base_framework_version=PT_VERSION,
                                #py_version='py38',
                                #container_version='ubuntu20.04',
                                instance_type='ml.c5.xlarge')

print(pt_container_uri)

763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:1.10.2-transformers4.17.0-cpu-py38-ubuntu20.04


In [63]:
mm_data_path = f"s3://{bucket}/{prefix}/"

In [64]:
container  = {
    'Image': pt_container_uri,
    'ContainerHostname': 'MultiModel',
    'Mode': 'MultiModel',
    'ModelDataUrl': mm_data_path,
    'Environment': {
	    'SAGEMAKER_PROGRAM':'inference.py',
	    'SAGEMAKER_SUBMIT_DIRECTORY':mm_data_path
    }
}


## Create Multi Container Endpoint

In [65]:
sm_client = sagemaker_session.sagemaker_client
runtime_sm_client = sagemaker_session.sagemaker_runtime_client

instance_type = "ml.m5.4xlarge"

In [66]:
import datetime

unique_id = datetime.datetime.now().strftime("%Y-%m-%d%H-%M-%S")

model_name = f"mme-sentiment-model-{unique_id}"

create_model_response = sm_client.create_model(
    ModelName=model_name,
    PrimaryContainer=container,
    ExecutionRoleArn=role,
)

In [67]:
endpoint_config_name = f"{model_name}-ep-config"

endpoint_config = sm_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            "VariantName": "prod",
            "ModelName": model_name,
            "InitialInstanceCount": 1,
            "InstanceType": instance_type,
        },
    ],
)

In [68]:
endpoint_name = f"{model_name}-ep"

endpoint = sm_client.create_endpoint(
    EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name
)

In [69]:
import time 

resp = sm_client.describe_endpoint(EndpointName=endpoint_name)
status = resp["EndpointStatus"]
print("Status: " + status)

while status == "Creating":
    time.sleep(60)
    resp = sm_client.describe_endpoint(EndpointName=endpoint_name)
    status = resp["EndpointStatus"]
    print("Status: " + status)

print("Arn: " + resp["EndpointArn"])
print("Status: " + status)

Status: Creating
Status: Creating
Status: Creating


In [27]:
print(endpoint_name)
print(model_name)

mce-nlp-model-2022-08-2511-01-19-ep
mce-nlp-model-2022-08-2511-01-19


## Testing Multi Model Endpoint

In [52]:
import json

ger_input = "Der Test verlief positiv."
en_input = "Test results are positive."

In [None]:
en_response = runtime_sm_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="application/json",
    Accept="application/json",
    TargetModel="english_sentiment.tar.gz",
    Body=json.dumps(en_input),
)

In [46]:
predictions = json.loads(en_response["Body"].read().decode())
print(predictions)

In [47]:
ger_response = runtime_sm_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="application/json",
    Accept="application/json",
    TargetModel="german_sentiment.tar.gz",
    Body=json.dumps(ger_input),
)

{'score': 0.7555540800094604, 'start': 77, 'end': 92, 'answer': 'Selva Amazónica'}


In [None]:
predictions = json.loads(ger_response["Body"].read().decode())
print(predictions)

# Clean Up

In [54]:
sm_client.delete_endpoint(EndpointName=endpoint_name)
sm_client.delete_model(ModelName = model_name)

{'ResponseMetadata': {'RequestId': 'a4628488-6003-4618-8569-e9c6541d79c2',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'a4628488-6003-4618-8569-e9c6541d79c2',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Thu, 25 Aug 2022 17:34:35 GMT'},
  'RetryAttempts': 0}}