In [1]:
import time
import os
from sagemaker import get_execution_role, session
import boto3
import sagemaker
from sagemaker import ModelPackage
from time import gmtime, strftime
from sagemaker.async_inference import AsyncInferenceConfig
import cv2
import urllib
import time
import json  
from botocore.exceptions import ClientError

region = boto3.Session().region_name
role = get_execution_role()
sm_client = boto3.client('sagemaker', region_name=region)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [2]:
sagemaker_session=sagemaker.Session(default_bucket="sagemaker-us-east-1-470086202700")

In [None]:
model_package_arn="arn:aws:sagemaker:us-east-1:470086202700:model-package/tomato-phenome-nonprod/7"

In [3]:
model = ModelPackage(role=role, 
                     model_package_arn=model_package_arn, 
                     sagemaker_session=sagemaker_session)

NameError: name 'model_package_arn' is not defined

In [4]:
async_config = AsyncInferenceConfig(
    output_path='s3://sagemaker-us-east-1-470086202700/async_output/',
    max_concurrent_invocations_per_instance=1
)

In [None]:
 endpoint_name = 'ballon-segmentation-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())

In [None]:
predictor = model.deploy(
    initial_instance_count=1,
    instance_type='ml.p3.2xlarge',
    endpoint_name=endpoint_name,
    async_inference_config=async_config
)

In [None]:
waiter = sm_client.get_waiter("endpoint_in_service")
print("Waiting for endpoint to create...")
waiter.wait(EndpointName=endpoint_name)
resp = sm_client.describe_endpoint(EndpointName=endpoint_name)

print(f"Endpoint Status: {resp['EndpointStatus']}"),resp

In [None]:
## Autoscaling the endpoint

In [None]:
client = boto3.client(
    "application-autoscaling"
)  # Common class representing Application Auto Scaling for SageMaker amongst other services

resource_id = (
    "endpoint/" + endpoint_name + "/variant/" + "AllTraffic"
)  # This is the format in which application autoscaling references the endpoint

# Configure Autoscaling on asynchronous endpoint down to zero instances
response = client.register_scalable_target(
    ServiceNamespace="sagemaker",
    ResourceId=resource_id,
    ScalableDimension="sagemaker:variant:DesiredInstanceCount",
    MinCapacity=0,
    MaxCapacity=2,
)

response = client.put_scaling_policy(
    PolicyName="Invocations-ScalingPolicy",
    ServiceNamespace="sagemaker",  # The namespace of the AWS service that provides the resource.
    ResourceId=resource_id,  # Endpoint name
    ScalableDimension="sagemaker:variant:DesiredInstanceCount",  # SageMaker supports only Instance Count
    PolicyType="TargetTrackingScaling",  # 'StepScaling'|'TargetTrackingScaling'
    TargetTrackingScalingPolicyConfiguration={
        "TargetValue": 5.0,  # The target value for the metric. - here the metric is - SageMakerVariantInvocationsPerInstance
        "CustomizedMetricSpecification": {
            "MetricName": "ApproximateBacklogSizePerInstance",
            "Namespace": "AWS/SageMaker",
            "Dimensions": [{"Name": "EndpointName", "Value": endpoint_name}],
            "Statistic": "Average",
        },
        "ScaleInCooldown": 600,  # The cooldown period helps you prevent your Auto Scaling group from launching or terminating
        # additional instances before the effects of previous activities are visible.
        # You can configure the length of time based on your instance startup time or other application needs.
        # ScaleInCooldown - The amount of time, in seconds, after a scale in activity completes before another scale in activity can start.
        "ScaleOutCooldown": 300  # ScaleOutCooldown - The amount of time, in seconds, after a scale out activity completes before another scale out activity can start.
        # 'DisableScaleIn': True|False - ndicates whether scale in by the target tracking policy is disabled.
        # If the value is true , scale in is disabled and the target tracking policy won't remove capacity from the scalable resource.
    },
)

In [None]:
image_path="test_balloon.jpg"
img=cv2.imread(image_path)

In [None]:
plt.imshow(img[...,::-1])

In [None]:
data = {
    "images": img.tolist(),
    "batch_size": 1
}
json_body = json.dumps(data)

In [None]:
byte_size = len(json_body.encode('utf-8'))
size_in_mb = byte_size / (1024 ** 2)
size_in_mb

In [None]:
s3 =  boto3.Session().client('s3')

In [None]:
bucket_name = 'sagemaker-us-east-1-470086202700'
file_path = 'async_inference/segm_input_data.json'
# Upload the serialized JSON to S3
s3.put_object(Bucket=bucket_name, Key=file_path, Body=json_body)

In [None]:
sm_runtime = boto3.Session().client('sagemaker-runtime')

In [None]:
input_location = f's3://{bucket_name}/{file_path}'

In [None]:
response = sm_runtime.invoke_endpoint_async(
    EndpointName=endpoint_name,
    InputLocation=input_location
)

In [None]:
output_location = response['OutputLocation']
output_location

In [None]:
def get_output(output_location):
    output_url = urllib.parse.urlparse(output_location)
    bucket = output_url.netloc
    key = output_url.path[1:]
    while True:
        try:
            file_content = sagemaker_session.read_s3_file(bucket=bucket, key_prefix=key)
            data = json.loads(file_content)
            return data  
        except ClientError as e:
            if e.response["Error"]["Code"] == "NoSuchKey":
                print("waiting for output...")
                time.sleep(2)
                continue
            raise

In [6]:
def rle_decode(rle, shape):
    rle = np.array(rle).reshape(-1, 2)
    run_lengths = rle[:, 0]
    run_values = rle[:, 1]
    pixels = np.repeat(run_values, run_lengths)
    return pixels.reshape(shape)

In [9]:
def visualize_masks_and_boxes(image, masks, boxes, color=(255, 0, 0), alpha=0.5):
    overlay = image.copy()
    output = image.copy()

    for i in range(masks.shape[0]):
        mask = masks[i, :, :]
        box = boxes[i, :]
        overlay[mask > 0] = color
        x1, y1, x2, y2 = box
        # Draw rectangle (bounding box)
        cv2.rectangle(output, (int(x1), int(y1)), (int(x2), int(y2)), (255,255,0), 4)
    
    cv2.addWeighted(overlay, alpha, output, 1 - alpha, 0, output)
    output = cv2.cvtColor(output, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(10, 10))
    plt.imshow(output)
    plt.axis('off')
    plt.show()

In [7]:
results=get_output(output_location)

NameError: name 'get_output' is not defined

In [8]:
results.keys()

NameError: name 'results' is not defined

In [None]:
rle_masks=results["masks"]
masks_shape= results["mask_shape"]
masks=np.array([rle_decode(rle,masks_shape) for rle in rle_masks ])
boxes=np.array(results["boxes"])

In [None]:
visualize_masks_and_boxes(img,masks,boxes)

In [None]:
response = client.deregister_scalable_target(
    ServiceNamespace="sagemaker",
    ResourceId=resource_id,
    ScalableDimension="sagemaker:variant:DesiredInstanceCount",
)

In [None]:
sm_client.delete_endpoint(EndpointName=endpoint_name)