In [None]:
# Update with your own model name

model_name = 'sagemaker-xgboost-2020-06-09-08-33-24-782'

In [None]:
production_variants = [
        {
            'VariantName': 'variant-1',
            'ModelName': model_name,
            'InitialInstanceCount': 2,
            'InitialVariantWeight': 1,
            'InstanceType': 'ml.m5.large'
        }
]

In [None]:
import boto3, time, pprint

sm = boto3.client('sagemaker')

In [None]:
endpoint_config_name = 'xgboost-one-model-epc-'+time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

In [None]:
response = sm.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=production_variants,
    Tags=[{'Key': 'Name','Value': endpoint_config_name},
          {'Key': 'Algorithm','Value': 'xgboost'}]
)
pprint.pprint(response)

In [None]:
response = sm.describe_endpoint_config(EndpointConfigName=endpoint_config_name)

pprint.pprint(response)

In [None]:
endpoint_name = 'xgboost-one-model-ep-'+time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

In [None]:
response = sm.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name,
    Tags=[{'Key': 'Name','Value': endpoint_name},
          {'Key': 'Algorithm','Value': 'xgboost'},
          {'Key': 'Environment','Value': 'development'}]
)

In [None]:
response = sm.describe_endpoint(EndpointName=endpoint_name)

pprint.pprint(response)

In [None]:
app = boto3.client('application-autoscaling')

In [None]:
response = app.register_scalable_target(
    ServiceNamespace='sagemaker',
    ResourceId='endpoint/'+endpoint_name+'/variant/variant-1',
    ScalableDimension='sagemaker:variant:DesiredInstanceCount',
    MinCapacity=2,
    MaxCapacity=10)

In [None]:
policy_name = 'xgboost-scaling-policy'

response = app.put_scaling_policy(
    PolicyName=policy_name,
    ServiceNamespace='sagemaker',
    ResourceId='endpoint/'+endpoint_name+'/variant/variant-1',
    ScalableDimension='sagemaker:variant:DesiredInstanceCount',
    PolicyType='TargetTrackingScaling',
    TargetTrackingScalingPolicyConfiguration={
        'TargetValue': 1000.0,
        'PredefinedMetricSpecification': {
            'PredefinedMetricType': 'SageMakerVariantInvocationsPerInstance'
        },
        "ScaleInCooldown": 60,
        "ScaleOutCooldown": 60
    }
)

In [None]:
test_sample = '0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,396.90,4.98'

In [None]:
smrt = boto3.Session().client(service_name='runtime.sagemaker') 

while True:
    smrt.invoke_endpoint(EndpointName=endpoint_name, 
                         ContentType='text/csv', 
                         Body=test_sample)

In [None]:
app.delete_scaling_policy(
    PolicyName=policy_name,
    ServiceNamespace='sagemaker',
    ScalableDimension='sagemaker:variant:DesiredInstanceCount',
    ResourceId='endpoint/'+endpoint_name+'/variant/variant-1')

In [None]:
sm.delete_endpoint(EndpointName=endpoint_name)

In [None]:
sm.delete_endpoint_config(EndpointConfigName=endpoint_config_name)