# POC 5

## Inference

Let's walk through deploying a DCN model in Sagemaker!

In [None]:
#!pip install easydict

In [None]:
%pylab inline
import os
import boto3
import time
import io
from matplotlib.pyplot import imshow, imread
import subprocess
from skimage.transform import resize as skresize
from PIL import Image
import requests
import cv2
import random
import numpy as np
import json
import sagemaker
from sagemaker.predictor import StringDeserializer
from sagemaker.predictor import RealTimePredictor, json_deserializer
import image_pb2 as impb
import ast
from IPython.display import clear_output
from dcn.lib.utils.show_boxes import show_boxes


sess = sagemaker.Session() # can use LocalSession() to run container locally
bucket = 'ar54' #sess.default_bucket() # can replace with our own bucket 
role = sagemaker.session.get_execution_role()
s3 = boto3.client('s3')

#!cp /home/ec2-user/SageMaker/SageMaker-Inference-Advanced/n-labs/11-manual-model-load/dcn/model/rfcn_dcn_coco-0000.params dcn/model/

Lets take a look at our inference script

In [None]:
!pygmentize inference.py

In order to deploy our model to an endpoint, we need to put our weights into gzip format

In [None]:
!(cd dcn/model/ && tar -czvf /tmp/model-rfcn.tar.gz *)

In [None]:
s3.upload_file('/tmp/model-rfcn.tar.gz', bucket, 'super_models/model-rfcn.tar.gz')
!aws s3 ls s3://ar54/super_models/   

Now we create our container we will use for DCN inference.

In [None]:
%%sh

# The name of our algorithm
algorithm_name=mxnet-serving-160-gpu-py2

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
#region=${region:-us-west-2}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"

# If the repository doesn't exist in ECR, create it.
aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1

if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly
$(aws ecr get-login --region ${region} --no-include-email)

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

#docker build -t chazarey-mxnet-serving:1.6.0-gpu-py3 -f docker/1.6.0/py3/Dockerfile.gpu .

docker build -t ${algorithm_name} -f Dockerfile.gpu .

docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

Now that we have built our container, we can deploy our endpoint. This process usually takes some time.

In [None]:
%%time

from sagemaker.mxnet import MXNetModel

model_data="s3://ar54/super_models/model-rfcn.tar.gz"

model = MXNetModel(
    model_data=model_data,
    role=role,
    image="308412838853.dkr.ecr.us-east-2.amazonaws.com/mxnet-serving-160-gpu-py2:latest",
    entry_point="inference.py",
    py_version='py2',
    framework_version='1.6.0',
    enable_cloudwatch_metrics=True
)

# for deploying the endpoint locally for testing we can use an instance_type of local
#predictor = model.deploy(instance_type="local_gpu", initial_instance_count=1)
#predictor = model.deploy(instance_type="local", initial_instance_count=1)

predictor = model.deploy(instance_type='ml.g4dn.2xlarge', initial_instance_count=1)

In [None]:
num_classes = 81
classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
           'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
           'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
           'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
           'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
           'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
           'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse',
           'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
           'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

Now that we have deployed our endpoint, let's test it out!
We are going to package our image into a protobuf object and then send it to our endpoint for classification. 
Right now it takes approximately 60ms end to end 

In [None]:
impath = '/home/ec2-user/SageMaker/GitHub/SageMaker-CustomMXNet-Autoscaling/Lab2/dcn/demo/COCO_test2015_000000000891.jpg'

img = imread(impath)
imshow(img)

In [None]:
impath = '/home/ec2-user/SageMaker/GitHub/SageMaker-CustomMXNet-Autoscaling/Lab2/dcn/demo/769452-1.2.jpg'

In [None]:
%%time

with open(impath, 'rb') as f:
    payload = f.read()

image_packet = impb.PBImage()
image_packet.image_data = payload

predictor.serializer = None
predictor.deserializer = StringDeserializer()
predictor.accept = None
predictor.content_type = 'application/octet-stream'

response = predictor.predict(image_packet.SerializeToString())          


In [None]:
!ls -alh /home/ec2-user/SageMaker/GitHub/SageMaker-CustomMXNet-Autoscaling/Lab2/dcn/demo/769452-1.2.jpg

Let's look at our image prior to classification

In [None]:
impath = '/home/ec2-user/SageMaker/GitHub/SageMaker-CustomMXNet-Autoscaling/Lab2/dcn/demo/769452-1.2.jpg'

In [None]:
resp_list = []
for e in eval(response)['array']:
    resp_list.append(np.array(e))

In [None]:
#im = cv2.imread('./dcn/demo/' + im_name)
im = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
show_boxes(im, resp_list, classes, 1)

In [None]:
impath = '/home/ec2-user/SageMaker/GitHub/SageMaker-CustomMXNet-Autoscaling/Lab2/dcn/demo/769452-1.2.jpg'
#impath = '/home/ec2-user/SageMaker/GitHub/SageMaker-CustomMXNet-Autoscaling/Lab2/dcn/demo/769452-2.4.jpg'
img = imread(impath)

with open(impath, 'rb') as f:
    payload = f.read()

image_packet = impb.PBImage()
image_packet.image_data = payload

predictor.serializer = None
predictor.deserializer = StringDeserializer()
predictor.accept = None
predictor.content_type = 'application/octet-stream'

%timeit response = predictor.predict(image_packet.SerializeToString())          

resp_list = []
for e in eval(response)['array']:
    resp_list.append(np.array(e))
    
im = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
show_boxes(im, resp_list, classes, 1)    

In [None]:
import numpy as np 
import datetime
import math
import time
import boto3   
import matplotlib.pyplot as plt

endpoint_name=predictor.endpoint
total_runs=1000

In [None]:
print('Running {} inferences for {}:'.format(total_runs, endpoint_name))

client_times = []
cw_start = datetime.datetime.utcnow()

for i in range(total_runs):    
    
    client_start = time.time()
    
    response = predictor.predict(image_packet.SerializeToString())
        
    client_end = time.time()
    client_times.append((client_end - client_start)*1000)


cw_end = datetime.datetime.utcnow()    
    
print('Client end-to-end latency percentiles:')
client_avg = np.mean(client_times)
client_p50 = np.percentile(client_times, 50)
client_p90 = np.percentile(client_times, 90)
client_p95 = np.percentile(client_times, 95)
client_p100 = np.percentile(client_times, 100)
print('Avg | P50 | P90 | P95 | P100')
print('{:.4f} | {:.4f} | {:.4f} | {:.4f}\n'.format(client_avg, client_p50, client_p90, client_p95, client_p100))

print('Getting Cloudwatch:')
cloudwatch = boto3.client('cloudwatch')
statistics=['SampleCount', 'Average', 'Minimum', 'Maximum']
extended=['p50', 'p90', 'p95', 'p100']

# Give 5 minute buffer to end
cw_end += datetime.timedelta(minutes=5)

# Period must be 1, 5, 10, 30, or multiple of 60
# Calculate closest multiple of 60 to the total elapsed time
factor = math.ceil((cw_end - cw_start).total_seconds() / 60)
period = factor * 60
period = int(period)

print('Time elapsed: {} seconds'.format((cw_end - cw_start).total_seconds()))
print('Using period of {} seconds\n'.format(period))

cloudwatch_ready = False
# Keep polling CloudWatch metrics until datapoints are available
while not cloudwatch_ready:
  time.sleep(30)
  print('Waiting 30 seconds ...')
  # Must use default units of microseconds
  model_latency_metrics = cloudwatch.get_metric_statistics(MetricName='ModelLatency',
                                             Dimensions=[{'Name': 'EndpointName',
                                                          'Value': endpoint_name},
                                                         {'Name': 'VariantName',
                                                          'Value': "AllTraffic"}],
                                             Namespace="AWS/SageMaker",
                                             StartTime=cw_start,
                                             EndTime=cw_end,
                                             Period=period,
                                             Statistics=statistics,
                                             ExtendedStatistics=extended
                                             )
  # Should be 1000
  if len(model_latency_metrics['Datapoints']) > 0:
    print('{} latency datapoints ready'.format(model_latency_metrics['Datapoints'][0]['SampleCount']))
    side_avg = model_latency_metrics['Datapoints'][0]['Average'] / total_runs
    side_p50 = model_latency_metrics['Datapoints'][0]['ExtendedStatistics']['p50'] / total_runs
    side_p90 = model_latency_metrics['Datapoints'][0]['ExtendedStatistics']['p90'] / total_runs
    side_p95 = model_latency_metrics['Datapoints'][0]['ExtendedStatistics']['p95'] / total_runs
    side_p100 = model_latency_metrics['Datapoints'][0]['ExtendedStatistics']['p100'] / total_runs
    print('Avg | P50 | P90 | P95 | P100')
    print('{:.4f} | {:.4f} | {:.4f} | {:.4f}\n'.format(side_avg, side_p50, side_p90, side_p95, side_p100))

    cloudwatch_ready = True
    

In [None]:
#client_times

In [None]:
fig = plt.figure(figsize=(80, 60))
ax = fig.add_subplot(111)
ax.plot(client_times)