In [None]:
!pygmentize endpoint-one-model.yml

In [None]:
import boto3

sm = boto3.client('sagemaker')
cf = boto3.client('cloudformation')

## Create one-model endpoint

In [None]:
# Update this with your own model name
training_job = 'tensorflow-training-2021-05-28-14-25-57-394'

job = sm.describe_training_job(TrainingJobName=training_job)
model_data_url = job['ModelArtifacts']['S3ModelArtifacts']
role_arn       = job['RoleArn']

# https://github.com/aws/deep-learning-containers/blob/master/available_images.md
container_image = '763104351884.dkr.ecr.eu-west-1.amazonaws.com/tensorflow-inference:2.1.0-cpu-py36-ubuntu18.04'

In [None]:
import time

timestamp = time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

stack_name='endpoint-one-model-'+timestamp
print(stack_name)

with open('endpoint-one-model.yml', 'r') as f:
        response = cf.create_stack(StackName=stack_name,
                   TemplateBody=f.read(),
                   Parameters=[
                        {"ParameterKey":"ModelName",     "ParameterValue":training_job+'-'+timestamp},
                        {"ParameterKey":"ContainerImage","ParameterValue":container_image},
                        {"ParameterKey":"ModelDataUrl",  "ParameterValue":model_data_url},
                        {"ParameterKey":"RoleArn",       "ParameterValue":role_arn} ])
        print(response)

In [None]:
waiter = cf.get_waiter('stack_create_complete')
waiter.wait(StackName=stack_name)

In [None]:
response = cf.describe_stack_events(StackName=stack_name)

for e in response['StackEvents']:
    print('%s %s' % (e['ResourceType'], e['ResourceStatus']))

In [None]:
response = cf.describe_stacks(StackName=stack_name)

print(response['Stacks'][0]['StackStatus'])
for o in response['Stacks'][0]['Outputs']:
    if o['OutputKey']=='EndpointName':
        endpoint_name = o['OutputValue']

print(endpoint_name)

## Apply change set to update instance count

In [None]:
response = cf.create_change_set(
    StackName=stack_name,
    ChangeSetName='add-instance',
    UsePreviousTemplate=True,
    Parameters=[
        {"ParameterKey":"InstanceCount", "ParameterValue": "2"},
        {"ParameterKey":"ModelName",     "UsePreviousValue": True},
        {"ParameterKey":"ContainerImage","UsePreviousValue": True},
        {"ParameterKey":"ModelDataUrl",  "UsePreviousValue": True},
        {"ParameterKey":"RoleArn",       "UsePreviousValue": True}
    ]
)

response

In [None]:
waiter = cf.get_waiter('change_set_create_complete')
waiter.wait(
    StackName=stack_name, 
    ChangeSetName='add-instance'
)

In [None]:
response = cf.describe_change_set(
    StackName=stack_name,
    ChangeSetName='add-instance'
)

response['Changes']

In [None]:
response = cf.execute_change_set(
    StackName=stack_name,
    ChangeSetName='add-instance'
)

response

In [None]:
response = cf.describe_stacks(StackName=stack_name)

print(response['Stacks'][0]['StackStatus'])

In [None]:
response = cf.describe_stack_events(StackName=stack_name)

for e in response['StackEvents']:
    print('%s %s' % (e['ResourceType'], e['ResourceStatus']))

In [None]:
waiter = cf.get_waiter('stack_update_complete')
waiter.wait(StackName=stack_name)

In [None]:
response = sm.describe_endpoint(EndpointName=endpoint_name)

response['ProductionVariants'][0]['CurrentInstanceCount']

## Apply change set to add second production variant to endpoint

In [None]:
!pygmentize endpoint-two-models.yml

In [None]:
# Update this with your own model name
training_job_2 = 'tensorflow-training-2021-05-28-14-40-01-442'

job_2 = sm.describe_training_job(TrainingJobName=training_job_2)
model_data_url_2 = job_2['ModelArtifacts']['S3ModelArtifacts']

In [None]:
with open('endpoint-two-models.yml', 'r') as f:
    response = cf.create_change_set(
        StackName=stack_name,
        ChangeSetName='add-model',
        TemplateBody=f.read(),
        Parameters=[
            {"ParameterKey":"ModelName",      "UsePreviousValue": True},
            {"ParameterKey":"ModelDataUrl",   "UsePreviousValue": True},
            {"ParameterKey":"ContainerImage", "UsePreviousValue": True},
            {"ParameterKey":"RoleArn",        "UsePreviousValue": True}, 
            {"ParameterKey":"ModelName2",     "ParameterValue": training_job_2+'-'+timestamp},
            {"ParameterKey":"ModelDataUrl2",  "ParameterValue": model_data_url_2}
        ]
    )

response

In [None]:
waiter = cf.get_waiter('change_set_create_complete')
waiter.wait(
    StackName=stack_name, 
    ChangeSetName='add-model'
)

In [None]:
response = cf.describe_change_set(
    StackName=stack_name,
    ChangeSetName='add-model'
)

response['Changes']

In [None]:
response = cf.execute_change_set(
    StackName=stack_name,
    ChangeSetName='add-model'
)

response

In [None]:
waiter = cf.get_waiter('stack_update_complete')
waiter.wait(StackName=stack_name)

In [None]:
response = sm.describe_endpoint(EndpointName=endpoint_name)

response['ProductionVariants']

## Create a CloudWatch alarm for model latency

In [None]:
cw = boto3.client('cloudwatch')

alarm_name = 'My_endpoint_latency'

response = cw.put_metric_alarm(
    AlarmName=alarm_name,
    ComparisonOperator='GreaterThanThreshold',
    EvaluationPeriods=1,
    MetricName='ModelLatency',
    Namespace='AWS/SageMaker',
    Period=60,
    Statistic='Average',
    Threshold=500000.0,
    AlarmDescription='Alarm when 1-minute average latency exceeds 500ms',
    Dimensions=[
        {
          'Name': 'EndpointName',
          'Value': endpoint_name
        },
        {
          'Name': 'VariantName',
          'Value': 'variant-2'
        }
    ],
    Unit='Microseconds'
)

response

In [None]:
response = cw.describe_alarms(AlarmNames=[alarm_name])

for a in response['MetricAlarms']:
    if a['AlarmName'] == alarm_name:
        alarm_arn = a['AlarmArn']
        
print(alarm_arn)

## Canary deployment of second model

In [None]:
weights = list(range(10,110,10))

print(weights)

In [None]:
for w in weights:
    response = cf.update_stack(
        StackName=stack_name,
        UsePreviousTemplate=True,
        Parameters=[
            {"ParameterKey":"ModelName",      "UsePreviousValue": True},
            {"ParameterKey":"ModelDataUrl",   "UsePreviousValue": True},
            {"ParameterKey":"ContainerImage", "UsePreviousValue": True},
            {"ParameterKey":"RoleArn",        "UsePreviousValue": True}, 
            {"ParameterKey":"ModelName2",     "UsePreviousValue": True},
            {"ParameterKey":"ModelDataUrl2",  "UsePreviousValue": True},
            {"ParameterKey":"VariantWeight",  "ParameterValue": str(100-w)},
            {"ParameterKey":"VariantWeight2", "ParameterValue": str(w)}
        ],
        RollbackConfiguration={
            'RollbackTriggers': [
                {
                    'Arn': alarm_arn,
                    'Type': 'AWS::CloudWatch::Alarm'
                }
            ],
            'MonitoringTimeInMinutes': 5
        }
    )
    waiter = cf.get_waiter('stack_update_complete')
    waiter.wait(StackName=stack_name)
    print("Sending %d percent of traffic to new model" % w)

In [None]:
cf.delete_stack(StackName=stack_name)