## 2.1 Import Python Libraries

In [6]:
import boto3, cv2, time, json
import base64
import matplotlib.pyplot as plt
from io import BytesIO
from PIL import Image
import numpy as np

from sagemaker.mxnet import MXNetPredictor
from sagemaker.deserializers import JSONDeserializer

## 2.2 Check if Endpoint creation is successful and create the predictor

In [7]:
sm_client = boto3.client(service_name="sagemaker")

# Restore the endpoint name stored in the 2_DeployEndpoint.ipynb notebook
ENDPOINT_NAME = 'yolov8-optimized-images-serverless-endpoint' 
print(f'Endpoint Name: {ENDPOINT_NAME}')

endpoint_created = False
while True:
    response = sm_client.list_endpoints()
    for ep in response['Endpoints']:
        print(f"Endpoint Status = {ep['EndpointStatus']}")
        if ep['EndpointName']==ENDPOINT_NAME and ep['EndpointStatus']=='InService':
            endpoint_created = True
            break
    if endpoint_created:
        break
    time.sleep(5)

Endpoint Name: yolov8-optimized-images-serverless-endpoint
Endpoint Status = InService


In [8]:
predictor = MXNetPredictor(endpoint_name=ENDPOINT_NAME,
                             deserializer=JSONDeserializer())

## 2.3 Run Inference and Generate output results

In [9]:
infer_start_time = time.time()

orig_image = cv2.imread('./data/db6d38a1-4fae2f3d.jpg')

payload = json.dumps({"image": [{"b64": base64.b64encode(cv2.imencode('.jpg', orig_image)[1].tobytes()).decode('utf-8')}]})
result = predictor.predict(payload)

output_image = result["image"]

infer_end_time = time.time()

print(f"Inference Time = {infer_end_time - infer_start_time:0.4f} seconds")



ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received server error (0) from model with message "Your invocation timed out while waiting for a response from model container. Review the latency metrics in Amazon CloudWatch, resolve the issue, and try again.". See https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints/yolov8-optimized-images-serverless-endpoint in account 141178960309 for more information.

In [None]:
payload = json.loads(payload)
b64_image = payload['image'][0]["b64"]
img_bytes = base64.b64decode(b64_image)
jpg_as_np = np.frombuffer(img_bytes, dtype=np.uint8)
img = cv2.imdecode(jpg_as_np, flags=cv2.IMREAD_COLOR)

In [None]:
# Convert the base64 string to bytes
image_bytes = base64.b64decode(output_image)

# Load the image from the bytes using PIL
image = Image.open(BytesIO(image_bytes))

# Display the image using Matplotlib
plt.figure(figsize=(8, 8))
plt.imshow(image)
plt.axis('off')
plt.show()


In [None]:
infer_start_time = time.time()

orig_image = cv2.imread('./data/cf3b0008-4cb31ab3.jpg')

payload = cv2.imencode('.jpg', orig_image)[1].tobytes()
result = predictor.predict(payload)

output_image = result["image"]

infer_end_time = time.time()

print(f"Inference Time = {infer_end_time - infer_start_time:0.4f} seconds")

In [None]:
# Convert the base64 string to bytes
image_bytes = base64.b64decode(output_image)

# Load the image from the bytes using PIL
image = Image.open(BytesIO(image_bytes))

# Display the image using Matplotlib
plt.figure(figsize=(8, 8))
plt.imshow(image)
plt.axis('off')
plt.show()

## 2.4 Cleanup by removing Endpoint, Endpoint Config and Model

In [10]:
response = sm_client.describe_endpoint_config(EndpointConfigName=ENDPOINT_NAME)
print(response)
endpoint_config_name = response['EndpointConfigName']

# Delete Endpoint
sm_client.delete_endpoint(EndpointName=ENDPOINT_NAME)

# Delete Endpoint Configuration
sm_client.delete_endpoint_config(EndpointConfigName=endpoint_config_name)

# Delete Model
for prod_var in response['ProductionVariants']:
    model_name = prod_var['ModelName']
    sm_client.delete_model(ModelName=model_name)   

{'EndpointConfigName': 'yolov8-optimized-images-serverless-endpoint', 'EndpointConfigArn': 'arn:aws:sagemaker:us-east-1:141178960309:endpoint-config/yolov8-optimized-images-serverless-endpoint', 'ProductionVariants': [{'VariantName': 'AllTraffic', 'ModelName': 'mxnet-inference-2024-05-23-02-46-29-953', 'InitialVariantWeight': 1.0, 'ServerlessConfig': {'MemorySizeInMB': 3072, 'MaxConcurrency': 1}, 'VolumeSizeInGB': 5}], 'CreationTime': datetime.datetime(2024, 5, 23, 2, 46, 30, 802000, tzinfo=tzlocal()), 'EnableNetworkIsolation': False, 'ResponseMetadata': {'RequestId': 'a40f683a-9560-4d10-81af-6f557ba8767d', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'a40f683a-9560-4d10-81af-6f557ba8767d', 'content-type': 'application/x-amz-json-1.1', 'content-length': '468', 'date': 'Thu, 23 May 2024 02:52:16 GMT'}, 'RetryAttempts': 0}}
