In [1]:
import torch
import boto3
import tarfile
import os
import json
import sagemaker
from botocore.client import ClientError
from sagemaker import image_uris, get_execution_role
from time import gmtime, strftime

In [2]:
!wget https://download.pytorch.org/models/alexnet-owt-7be5be79.pth
!mkdir -p data/alexnet 2> /dev/null
!mv alexnet-owt-7be5be79.pth data/alexnet/
## !mv data/alexnet/alexnet-owt-7be5be79.pth data/alexnet/alexnet-owt-7be5be79.pt
!mkdir docker 2> /dev/null
!cp imagenet_classes.txt docker/

--2022-11-04 15:28:38--  https://download.pytorch.org/models/alexnet-owt-7be5be79.pth
Resolving download.pytorch.org (download.pytorch.org)... 18.160.37.73, 18.160.37.47, 18.160.37.94, ...
Connecting to download.pytorch.org (download.pytorch.org)|18.160.37.73|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 244408911 (233M) [application/x-www-form-urlencoded]
Saving to: ‘alexnet-owt-7be5be79.pth’


2022-11-04 15:28:38 (320 MB/s) - ‘alexnet-owt-7be5be79.pth’ saved [244408911/244408911]



In [3]:
%%writefile docker/entrypoint.py
import os
import shlex
import subprocess
import sys
from subprocess import CalledProcessError

from retrying import retry
from sagemaker_inference import model_server


def _retry_if_error(exception):
    return isinstance(exception, CalledProcessError or OSError)


@retry(stop_max_delay=1000 * 50, retry_on_exception=_retry_if_error)
def _start_mms():
    # by default the number of workers per model is 1, but we can configure it through the
    # environment variable below if desired.
    # os.environ['SAGEMAKER_MODEL_SERVER_WORKERS'] = '2'
    model_server.start_model_server(handler_service="/home/model-server/model_handler.py:handle")


def main():
    if sys.argv[1] == "serve":
        _start_mms()
    else:
        subprocess.check_call(shlex.split(" ".join(sys.argv[1:])))

    # prevent docker exit
    subprocess.call(["tail", "-f", "/dev/null"])


main()

Overwriting docker/entrypoint.py


In [4]:
%%writefile docker/model_handler.py
"""
ModelHandler defines an example model handler for load and inference requests for MXNet CPU models
"""
from sagemaker_inference import content_types, decoder, default_inference_handler, encoder, errors
from PIL import Image
from torchvision import transforms, models
import os
import io
import logging
import json
import torch


class ModelHandler(object):
    """
    A sample Model handler implementation.
    """

    def __init__(self):
        self.initialized = False
        self.py_model = None
        self.transform = None

    def initialize(self, context):
        """
        Initialize model. This will be called during model loading time
        :param context: Initial context contains model server system properties.
        :return:
        """
        logging.info('initialize')
        self.initialized = True
        properties = context.system_properties
        # Contains the url parameter passed to the load request
        model_dir = properties.get("model_dir")

        # Load PyTorch model
        device = "cuda" if torch.cuda.is_available() else "cpu"
        self.py_model = models.alexnet()
        state_dict = torch.load(os.path.join(model_dir, 'alexnet-owt-7be5be79.pth'))
        self.py_model.load_state_dict(state_dict)
        self.py_model.eval()
        
        # Setup transformations
        self.transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
            )])

    def preprocess(self, request):
        """
        Transform raw input into model input data.
        :param request: list of raw requests
        :return: list of preprocessed model input data
        """
        # Take the input data and pre-process it make it inference ready
        # Read the bytearray of the image from the input
        logging.info('preprocess')
        #logging.info(request)
        img = Image.open(io.BytesIO(request[0]['body'])) ## We always assume that this is one image
        img_t = self.transform(img)
        batch_t = torch.unsqueeze(img_t, 0)
        return [batch_t]

    def inference(self, model_input):
        """
        Internal inference methods
        :param model_input: transformed model input data list
        :return: list of inference output in NDArray
        """
        logging.info('preprocess')
        return [self.py_model(model_input[0])]

    def postprocess(self, inference_output):
        """
        Return predict result in as list.
        :param inference_output: list of inference output
        :return: list of predict results
        """
        logging.info('postprocess')
        with open('/imagenet_classes.txt') as f:
            classes = [line.strip() for line in f.readlines()]
        _, index = torch.max(inference_output[0], 1)
        #percentage = torch.nn.functional.softmax(inference_output[0], dim=1)[0] * 100
        ## Get the most likely class
        mostlikely = [classes[index[0]]]
        logging.info(mostlikely)
        return [mostlikely]

    def handle(self, data, context):
        """
        Call preprocess, inference and post-process functions
        :param data: input data
        :param context: mms context
        """
        logging.info('handle')
        model_input = self.preprocess(data)
        model_out = self.inference(model_input)
        return self.postprocess(model_out)


_service = ModelHandler()


def handle(data, context):
    logging.info('handle')
    if not _service.initialized:
        _service.initialize(context)

    if data is None:
        return None

    return _service.handle(data, context)

Overwriting docker/model_handler.py


In [5]:
%%writefile docker/Dockerfile
FROM ubuntu:18.04

# Set a docker label to advertise multi-model support on the container
LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
# Set a docker label to enable container to use SAGEMAKER_BIND_TO_PORT environment variable if present
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true


# Upgrade installed packages
RUN apt-get update && apt-get upgrade -y && apt-get clean

# Python package management and basic dependencies
RUN apt-get install -y curl python3.7 python3.7-dev python3.7-distutils

# Register the version in alternatives
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.7 1

# Set python 3 as the default python
RUN update-alternatives --set python /usr/bin/python3.7

# Install necessary dependencies for MMS and SageMaker Inference Toolkit
RUN apt-get -y install --no-install-recommends \
    build-essential \
    ca-certificates \
    openjdk-8-jdk-headless \
    curl \
    vim \
    && rm -rf /var/lib/apt/lists/* \
    && python --version \
    && curl -O https://bootstrap.pypa.io/get-pip.py \
    && python get-pip.py

RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1
RUN update-alternatives --install /usr/local/bin/pip pip /usr/local/bin/pip3 1

# Install MMS, and SageMaker Inference Toolkit to set up MMS
RUN pip3 --no-cache-dir install multi-model-server \
                                sagemaker-inference \
                                retrying

RUN pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu

# Copy entrypoint script to the image
COPY entrypoint.py /usr/local/bin/entrypoint.py
RUN chmod +x /usr/local/bin/entrypoint.py

RUN mkdir -p /home/model-server/

# Copy the default custom service file to handle incoming data and inference requests
COPY model_handler.py /home/model-server/model_handler.py

# Copy the imagenet classes, we will need these for postprocessing
COPY imagenet_classes.txt /imagenet_classes.txt

# Define an entrypoint script for the docker image
ENTRYPOINT ["python", "/usr/local/bin/entrypoint.py"]

# Define command to be passed to the entrypoint
CMD ["serve"]

Overwriting docker/Dockerfile


In [6]:
%%writefile docker/build_and_push.sh
# The name of our algorithm
algorithm_name=sagemaker-workshop-inf

cd docker

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
region=${region:-us-west-2}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"

# If the repository doesn't exist in ECR, create it.
aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1

if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly
# aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname}
$(aws ecr get-login --region ${region} --no-include-email)

# Get the login command from ECR in order to pull down the SageMaker PyTorch image
#$(aws ecr get-login --registry-ids 763104351884 --region ${region} --no-include-email)

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build -t ${algorithm_name} . --build-arg REGION=${region}
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

Overwriting docker/build_and_push.sh


In [7]:
!chmod +x docker/build_and_push.sh && docker/build_and_push.sh

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
Sending build context to Docker daemon   25.6kB
Step 1/19 : FROM ubuntu:18.04
 ---> 71eaf13299f4
Step 2/19 : LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
 ---> Using cache
 ---> e2588020736f
Step 3/19 : LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
 ---> Using cache
 ---> d3c71f575f5a
Step 4/19 : RUN apt-get update && apt-get upgrade -y && apt-get clean
 ---> Using cache
 ---> 71de34bdbb79
Step 5/19 : RUN apt-get install -y curl python3.7 python3.7-dev python3.7-distutils
 ---> Using cache
 ---> 1ca54a57b0cc
Step 6/19 : RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.7 1
 ---> Using cache
 ---> 83c7496f57fb
Step 7/19 : RUN update-alternatives --set python /usr/bin/python3.7
 ---> Using cache
 ---> a41806cc2d47
Step 8/19 : RUN apt-get -y install --no-install-recommends     build-essential     ca-certificates     openjdk-8-jdk-head

In [8]:
sm_client = boto3.client(service_name="sagemaker")
runtime_sm_client = boto3.client(service_name="sagemaker-runtime")

account_id = boto3.client("sts").get_caller_identity()["Account"]
region = boto3.Session().region_name

bucket = "sagemaker-workshop-{}-{}".format(region, account_id)
prefix = "sagemaker-workshop-endpoint"

role = get_execution_role()

In [9]:
with tarfile.open("data/alexnet.tar.gz", "w:gz") as tar:
    tar.add("data/alexnet", arcname=".")

In [10]:
s3 = boto3.resource("s3")
try:
    s3.meta.client.head_bucket(Bucket=bucket)
except ClientError:
    s3.create_bucket(Bucket=bucket)

models = {"alexnet.tar.gz"}

for model in models:
    key = os.path.join(prefix, model)
    with open("data/" + model, "rb") as file_obj:
        s3.Bucket(bucket).Object(key).upload_fileobj(file_obj)

In [11]:
model_name = "sagemaker-workshop" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
## You will have to replace this URL with your specific URL, your may look something like shown below
# model_url = "https://sagemaker-workshop-us-east-1-<ACCOUNT_ID>}.s3.amazonaws.com/<YOUR_PREFIX>/"
model_url = "https://{}.s3.amazonaws.com/{}/".format(bucket,prefix)
container = "{}.dkr.ecr.{}.amazonaws.com/{}:latest".format(
    account_id, region, "sagemaker-workshop-inf"
)

print("Model name: " + model_name)
print("Model data Url: " + model_url)
print("Container image: " + container)

container = {"Image": container, "ModelDataUrl": model_url, "Mode": "MultiModel"}

create_model_response = sm_client.create_model(
    ModelName=model_name, ExecutionRoleArn=role, Containers=[container]
)

print("Model Arn: " + create_model_response["ModelArn"])

Model name: sagemaker-workshop2022-11-04-15-29-16
Model data Url: https://sagemaker-workshop-us-east-1-684473352813.s3.amazonaws.com/sagemaker-workshop-endpoint/
Container image: 684473352813.dkr.ecr.us-east-1.amazonaws.com/sagemaker-workshop-inf:latest
Model Arn: arn:aws:sagemaker:us-east-1:684473352813:model/sagemaker-workshop2022-11-04-15-29-16


In [12]:
endpoint_config_name = "sagemaker-WorkshopEndpointConfig-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print("Endpoint config name: " + endpoint_config_name)

create_endpoint_config_response = sm_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            "InstanceType": "ml.m5.xlarge",
#            "InstanceType": "local",
            "InitialInstanceCount": 1,
            "InitialVariantWeight": 1,
            "ModelName": model_name,
            "VariantName": "AllTraffic-New-3",
        }
    ],
)

print("Endpoint config Arn: " + create_endpoint_config_response["EndpointConfigArn"])

Endpoint config name: sagemaker-WorkshopEndpointConfig-2022-11-04-15-29-19
Endpoint config Arn: arn:aws:sagemaker:us-east-1:684473352813:endpoint-config/sagemaker-workshopendpointconfig-2022-11-04-15-29-19


In [13]:
endpoint_name = "sagemaker-WorkshopEndpoint-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print("Endpoint name: " + endpoint_name)

create_endpoint_response = sm_client.create_endpoint(
    EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name
)
print("Endpoint Arn: " + create_endpoint_response["EndpointArn"])

resp = sm_client.describe_endpoint(EndpointName=endpoint_name)
status = resp["EndpointStatus"]
print("Endpoint Status: " + status)

print("Waiting for {} endpoint to be in service...".format(endpoint_name))
waiter = sm_client.get_waiter("endpoint_in_service")
waiter.wait(EndpointName=endpoint_name)

Endpoint name: sagemaker-WorkshopEndpoint-2022-11-04-15-29-25
Endpoint Arn: arn:aws:sagemaker:us-east-1:684473352813:endpoint/sagemaker-workshopendpoint-2022-11-04-15-29-25
Endpoint Status: Creating
Waiting for sagemaker-WorkshopEndpoint-2022-11-04-15-29-25 endpoint to be in service...


In [17]:
with open("kitten.jpg","rb") as f:
    payload = bytearray(f.read())

In [18]:
workshop_predictor = sagemaker.predictor.Predictor(endpoint_name,
                                                   sagemaker_session=None
                                                  )
predicted_value = workshop_predictor.predict(payload,target_model="alexnet.tar.gz")
print(predicted_value)

b'[\n  "tabby"\n]'


In [20]:
response = runtime_sm_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="image/jpeg",
    TargetModel="alexnet.tar.gz",  # this is the rest of the S3 path where the model artifacts are located
    Body=payload,
)

print(json.loads(response['Body'].read()))

['tabby']


In [21]:
sm_client.delete_endpoint(EndpointName=endpoint_name)
sm_client.delete_endpoint_config(EndpointConfigName=endpoint_config_name)
sm_client.delete_model(ModelName=model_name)

{'ResponseMetadata': {'RequestId': 'aec7e3d8-ad8b-4cf2-8361-d7700b3f07aa',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'aec7e3d8-ad8b-4cf2-8361-d7700b3f07aa',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Fri, 04 Nov 2022 15:35:10 GMT'},
  'RetryAttempts': 0}}

In [22]:
print(response)

{'ResponseMetadata': {'RequestId': '7cfcdbc1-fbb5-465d-bde9-155b3104a983', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '7cfcdbc1-fbb5-465d-bde9-155b3104a983', 'x-amzn-invoked-production-variant': 'AllTraffic-New-3', 'date': 'Fri, 04 Nov 2022 15:33:27 GMT', 'content-type': 'application/json', 'content-length': '13'}, 'RetryAttempts': 0}, 'ContentType': 'application/json', 'InvokedProductionVariant': 'AllTraffic-New-3', 'Body': <botocore.response.StreamingBody object at 0x7f02f9d8a970>}
