In [1]:
!python -m pip install --upgrade pip 
!pip install -U awscli
!pip install sagemaker --upgrade

Collecting pip
  Obtaining dependency information for pip from https://files.pythonhosted.org/packages/e0/63/b428aaca15fcd98c39b07ca7149e24bc14205ad0f1c80ba2b01835aedde1/pip-23.3-py3-none-any.whl.metadata
  Downloading pip-23.3-py3-none-any.whl.metadata (3.5 kB)
Downloading pip-23.3-py3-none-any.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m23.8 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.2.1
    Uninstalling pip-23.2.1:
      Successfully uninstalled pip-23.2.1
Successfully installed pip-23.3


In [2]:
%%sh

# Specify an algorithm name
algorithm_name=sanitize-container-v2

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"

# If the repository doesn't exist in ECR, create it.

aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1
if [ $? -ne 0 ]
then
aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Log into Docker
aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname}

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build -t ${algorithm_name} .
docker tag ${algorithm_name} ${fullname}


docker push ${fullname}

https://docs.docker.com/engine/reference/commandline/login/#credentials-store



Login Succeeded
Sending build context to Docker daemon  870.9MB
Step 1/12 : FROM 763104351884.dkr.ecr.us-east-1.amazonaws.com/pytorch-inference:2.0.1-gpu-py310-cu118-ubuntu20.04-sagemaker
 ---> 63a81320a60a
Step 2/12 : WORKDIR /opt/program
 ---> Using cache
 ---> 9d98a8819bb7
Step 3/12 : ENV PYTHONUNBUFFERED=TRUE
 ---> Using cache
 ---> b4fe372b6f36
Step 4/12 : ENV PYTHONDONTWRITEBYTECODE=TRUE
 ---> Using cache
 ---> d5e69c7173a8
Step 5/12 : ENV PATH="/opt/program:${PATH}"
 ---> Using cache
 ---> cb7a9bc5b5de
Step 6/12 : RUN apt -y update
 ---> Using cache
 ---> a608a69d626b
Step 7/12 : RUN apt install -y ffmpeg
 ---> Using cache
 ---> c7c237c00e47
Step 8/12 : RUN python -m pip install --upgrade pip
 ---> Using cache
 ---> 904d26a5c3aa
Step 9/12 : COPY ./requirements.txt ./requirements.txt
 ---> Using cache
 ---> eb9a8aea35c0
Step 10/12 : RUN pip install -r requirements.txt
 ---> Using cache
 ---> a89dbbbec906
Step 11/12 : COPY ./whisperDriver.py ./whisperDriver.py
 ---> Using cache
 -

In [3]:
import sagemaker
from time import gmtime, strftime
from datetime import datetime
from sagemaker.model import Model
from sagemaker.predictor import Predictor
from sagemaker import get_execution_role
from sagemaker.local import LocalSession
import boto3

# ========================================== CONSTANTS ===============================================

role = get_execution_role()

boto_session = boto3.session.Session()
sm_session = sagemaker.session.Session()
sm_client = boto_session.client("sagemaker")
sm_runtime = boto_session.client("sagemaker-runtime")
region = boto_session.region_name

# instance_type = "ml.m5.xlarge"
instance_type = "ml.p3.2xlarge"
container_id = "150360654484.dkr.ecr.us-east-2.amazonaws.com/sanitize-container-v2"

s3_bucket = sm_session.default_bucket()
bucket_prefix = "sanitize-inference-store"

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [4]:

# ======================================== CREATING MODEL ============================================

model_name = "sanitize-model"

create_model_response = sm_client.create_model(
    ModelName=model_name,
    ExecutionRoleArn=role,
    Containers=[{"Image": container_id}],
)

print(create_model_response)


{'ModelArn': 'arn:aws:sagemaker:us-east-2:150360654484:model/sanitize-model', 'ResponseMetadata': {'RequestId': 'b662bb01-4c48-4e62-a037-31d0c06e70de', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'b662bb01-4c48-4e62-a037-31d0c06e70de', 'content-type': 'application/x-amz-json-1.1', 'content-length': '76', 'date': 'Sun, 15 Oct 2023 20:32:12 GMT'}, 'RetryAttempts': 0}}


In [5]:
# =========================================== DEPLOYING ==============================================
deployment_name = "sanitize-endpoint-test-v2"

create_endpoint_config_response = sm_client.create_endpoint_config(
    EndpointConfigName= model_name + "-config-v1",
    ProductionVariants=[
        {
            "ModelName": model_name,
            "VariantName": "variant1",
            "InstanceType": instance_type,
            "InitialInstanceCount": 1
        }
    ],
    AsyncInferenceConfig={
     "OutputConfig": {
            "S3OutputPath": f"s3://{s3_bucket}/{bucket_prefix}/output"}
    }
)

create_endpoint_response = sm_client.create_endpoint(
        EndpointName=deployment_name,
        EndpointConfigName=model_name+"-config-v1",
)


# ======================================= Waiting until Ready ======================================

waiter = sm_client.get_waiter("endpoint_in_service")
print("Waiting for endpoint to create...")
waiter.wait(EndpointName=deployment_name)
resp = sm_client.describe_endpoint(EndpointName=deployment_name)
print(f"Endpoint Status: {resp['EndpointStatus']}")



Waiting for endpoint to create...
Endpoint Status: InService


In [6]:
#### ADD PERMISSIONS TO REGISTER SCALABLE TARGET FOR CURR NOTEBOOK
client_v2 = boto3.client(
    "application-autoscaling"
)  # Common class representing Application Auto Scaling for SageMaker amongst other services

resource_id = (
    "endpoint/" + deployment_name + "/variant/" + "variant1"
)  # This is the format in which application autoscaling references the endpoint

# Configure Autoscaling on asynchronous endpoint down to zero instances
response = client_v2.register_scalable_target(
    ServiceNamespace="sagemaker",
    ResourceId=resource_id,
    ScalableDimension="sagemaker:variant:DesiredInstanceCount",
    MinCapacity=0,
    MaxCapacity=2,
)

In [12]:
# =================================== Scale-out =============================================
response = client_v2.put_scaling_policy(
    PolicyName = f'scaleoutpolicy-{deployment_name}',
    ServiceNamespace="sagemaker",  # The namespace of the service that provides the resource.
    ResourceId=resource_id,  # Endpoint name
    ScalableDimension="sagemaker:variant:DesiredInstanceCount",  # SageMaker supports only Instance Count
    PolicyType="StepScaling",  # 'StepScaling' or 'TargetTrackingScaling'
    StepScalingPolicyConfiguration={
        "AdjustmentType": "ChangeInCapacity", # Specifies whether the ScalingAdjustment value in the StepAdjustment property is an absolute number or a percentage of the current capacity. 
        "MetricAggregationType": "Average", # The aggregation type for the CloudWatch metrics.
        "Cooldown": 300, # The amount of time, in seconds, to wait for a previous scaling activity to take effect. 
        "StepAdjustments": # A set of adjustments that enable you to scale based on the size of the alarm breach.
        [ 
            {
              "MetricIntervalLowerBound": 0,
              "ScalingAdjustment": 1
            }
          ]
    },    
)

cw_client = boto3.client('cloudwatch')
step_scaling_policy_arn = response['PolicyARN']

response = cw_client.put_metric_alarm(
    AlarmName=f'step_scaling-out_policy{deployment_name}',
    MetricName='HasBacklogWithoutCapacity',
    Namespace='AWS/SageMaker',
    Statistic='Average',
    EvaluationPeriods= 1,
    DatapointsToAlarm= 1,
    Threshold= 1,
    ComparisonOperator='GreaterThanOrEqualToThreshold',
    TreatMissingData='missing',
    Dimensions=[
        { 'Name':'EndpointName', 'Value':deployment_name },
    ],
    Period= 60,
    AlarmActions=[step_scaling_policy_arn]
)

# =================================== Scale-in =============================================

#Configure scaling policy to decrease instance count to zero when there are no further requests to process
response_scalein = client_v2.put_scaling_policy(
    PolicyName = f'scaleinpolicy-{deployment_name}',
    ServiceNamespace="sagemaker",  # The namespace of the service that provides the resource.
    ResourceId=resource_id,  # Endpoint name
    ScalableDimension="sagemaker:variant:DesiredInstanceCount",  # SageMaker supports only Instance Count
    PolicyType="StepScaling",  # 'StepScaling' or 'TargetTrackingScaling'
    StepScalingPolicyConfiguration={
        "AdjustmentType": "ChangeInCapacity", # Specifies whether the ScalingAdjustment value in the StepAdjustment property is an absolute number or a percentage of the current capacity. 
        "MetricAggregationType": "Average", # The aggregation type for the CloudWatch metrics.
        "Cooldown": 15, # The amount of time, in seconds, to wait for a previous scaling activity to take effect. 
        "StepAdjustments": # A set of adjustments that enable you to scale based on the size of the alarm breach.
        [ 
            {
              "MetricIntervalUpperBound": 0,
              "ScalingAdjustment": -1
            }
          ]
    },    
)


stepin_scaling_policy_arn = response_scalein['PolicyARN']

response = cw_client.put_metric_alarm(
    AlarmName=f'step_scale-in_policy-{deployment_name}',
    MetricName='ApproximateBacklogSizePerInstance',
    Namespace='AWS/SageMaker',
    Statistic='Average',
    EvaluationPeriods= 1,
    DatapointsToAlarm= 1,
    Threshold= 0.5,
    ComparisonOperator='LessThanOrEqualToThreshold',
    TreatMissingData='missing',
    Dimensions=[
        { 'Name':'EndpointName', 'Value':deployment_name },
    ],
    Period= 60,
    AlarmActions=[stepin_scaling_policy_arn]
)


In [23]:
import os
import uuid
import json

def upload_file(input_location):
    prefix = f"{bucket_prefix}/input"
    
    output_id = str(uuid.uuid4())

    file_location = sm_session.upload_data(
        input_location,
        bucket=sm_session.default_bucket(),
        key_prefix=prefix,
        extra_args={"ContentType": "video/mp4"})
    
    data = {"file_uri":file_location , "output_target": f"s3://{s3_bucket}/{bucket_prefix}/output", "filename":input_location, "output_name": output_id + ".json"}
        
    input_uri =  sm_session.upload_string_as_file_body(
        json.dumps(data),
        bucket=sm_session.default_bucket(),
        key=prefix + "/"+ output_id + ".json")
    
    return {"input" : input_uri, "output" :  data['output_target'] + "/" + data['output_name']}

test_file = "30_min.mp4"
io_locations = upload_file(test_file)

print("file location",io_locations)

file location {'input': 's3://sagemaker-us-east-2-150360654484/sanitize-inference-store/input/256d1554-15f1-4f31-aee4-20cfd6965f8c.json', 'output': 's3://sagemaker-us-east-2-150360654484/sanitize-inference-store/output/256d1554-15f1-4f31-aee4-20cfd6965f8c.json'}


In [24]:
deployment_name = "sanitize-endpoint-test-v2"

response = sm_runtime.invoke_endpoint_async(
    EndpointName=deployment_name, InputLocation=io_locations['input'],
)

output_location = io_locations['output']
print(f"OutputLocation: {output_location}")

OutputLocation: s3://sagemaker-us-east-2-150360654484/sanitize-inference-store/output/256d1554-15f1-4f31-aee4-20cfd6965f8c.json


In [25]:
import time
from datetime import datetime

from sagemaker.s3 import S3Downloader

def get_output(output_location):
    print("waiting for output...")
    start = datetime.now()
    while True:
            output = S3Downloader.download(output_location, "./", sagemaker_session=sm_session)
            if len(output) > 0:
                end = datetime.now()
                total_time = (end-start).total_seconds()
                return [output[0], total_time]
            else:
                time.sleep(2)
                continue


output = get_output(output_location)
print(f"Output: {output}")

waiting for output...
Output: [['./256d1554-15f1-4f31-aee4-20cfd6965f8c.json'], 530.769032]


In [None]:
import sagemaker
from sagemaker.s3 import S3Downloader, S3Uploader

sm_session = sagemaker.session.Session()

# S3Downloader.download("s3://sagemaker-us-east-2-150360654484/sanitize-inference-store/input/7_mins.mp4", "./", sagemaker_session=sm_session)
# S3Uploader.upload("requirements.txt", f"s3://{s3_bucket}/{bucket_prefix}/output", sagemaker_session=sm_session )
    

In [49]:
import json

output_file = output[0][0]
data = {}
with open(output_file) as json_file:
    data = json.load(json_file)

print (data["time"])
print (data["result"]["word_segments"])

67.990456


In [40]:
response = sm_client.describe_endpoint(
    EndpointName=deployment_name
)
response = response['ProductionVariants'][0]
print("currCount:", response['CurrentInstanceCount'])
print("desiredCount:" , response['DesiredInstanceCount'])

currCount: 0
desiredCount: 0
