In [None]:
!pip -q install locust

In [None]:
import sagemaker
import json
import boto3

role = sagemaker.get_execution_role()
sess = sagemaker.Session()
region = sess.boto_region_name
bucket = sess.default_bucket()
prefix = 'sagemaker-studio-book/chapter07'

In [None]:
!mkdir load_testing

In [None]:
%%writefile load_testing/locustfile.py
from locust import task, between, events, User
import sagemaker
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer
import json
import os, sys
import time
import numpy as np

endpoint_name=os.environ['ENDPOINT_NAME']
predictor = sagemaker.predictor.Predictor(endpoint_name, 
                    serializer=JSONSerializer(),
                    deserializer=JSONDeserializer())
print(predictor.endpoint_name)

csv_test_dir_prefix = 'imdb_data/test'
csv_test_filename = 'test.csv'

# loads a sample and make one inference call
x_test = np.loadtxt(f'{csv_test_dir_prefix}/{csv_test_filename}', 
                    delimiter=',', dtype='int', max_rows=1)
out = predictor.predict(x_test)
print(out)

class SMLoadTestUser(User):
    wait_time = between(0, 1)
    
    @task
    def test_endpoint(self):
        start_time = time.time()
        try:
            predictor.predict(x_test)
            total_time = int((time.time() - start_time) * 1000)
            events.request_success.fire(
                request_type="sagemaker",
                name="predict",
                response_time=total_time,
                response_length=0)

        except:
            total_time = int((time.time() - start_time) * 1000)
            events.request_failure.fire(
                request_type="sagemaker",
                name="predict",
                response_time=total_time,
                response_length=0,
                exception=sys.exc_info())

## Scenario 1: load testing original instance configuration (one ml.c5.xlarge)

In [None]:
sagemaker_client = sess.boto_session.client('sagemaker')
autoscaling_client = sess.boto_session.client('application-autoscaling')

endpoint_name = 'imdb-tf-2021-09-21-17-37-20-2021-10-04-21-28-40-974'
resource_id = f'endpoint/{endpoint_name}/variant/AllTraffic' 
response = autoscaling_client.register_scalable_target(
   ServiceNamespace='sagemaker',
   ResourceId=resource_id,
   ScalableDimension='sagemaker:variant:DesiredInstanceCount',
   MinCapacity=1,
   MaxCapacity=1)

In [None]:
%%sh
export ENDPOINT_NAME='imdb-tf-2021-09-21-17-37-20-2021-10-04-21-28-40-974'
locust -f load_testing/locustfile.py --worker --loglevel ERROR --autostart --autoquit 10 & 

locust -f load_testing/locustfile.py --worker --loglevel ERROR --autostart --autoquit 10 &

locust -f load_testing/locustfile.py --headless -u 500 -r 10 -t 60s \
       --print-stats --only-summary --loglevel ERROR \
       --autostart --autoquit 10 --master --expect-workers 2

# Scenario 2: load test scaled up configuration (one ml.c5.2xlarge)

In [None]:
import sagemaker
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer
import json
import os, sys
import time
import numpy as np

In [None]:
from sagemaker.tensorflow import TensorFlow

training_job_name='imdb-tf-2021-09-21-17-37-20'

estimator = TensorFlow.attach(training_job_name)
predictor_c5_2xl = estimator.deploy(initial_instance_count=1, 
                                      instance_type='ml.c5.2xlarge',
                                      wait=True)

In [None]:
predictor_c5_2xl.endpoint_name

In [None]:
%%sh
export ENDPOINT_NAME='imdb-tf-2021-09-21-17-37-20-2021-10-11-16-01-55-197'
locust -f load_testing/locustfile.py --worker --loglevel ERROR --autostart --autoquit 10 & 

locust -f load_testing/locustfile.py --worker --loglevel ERROR --autostart --autoquit 10 &

locust -f load_testing/locustfile.py --headless -u 500 -r 10 -t 60s \
       --print-stats --only-summary --loglevel ERROR \
       --autostart --autoquit 10 --master --expect-workers 2

# Scenario 3: load test GPU instance dedicated to ML inference (one ml.g4dn.xlarge)

In [None]:
predictor_g4dn_xl = estimator.deploy(initial_instance_count=1, 
                                      instance_type='ml.g4dn.xlarge',
                                      wait=True)

In [None]:
predictor_g4dn_xl.endpoint_name

In [None]:
%%sh
export ENDPOINT_NAME='imdb-tf-2021-09-21-17-37-20-2021-10-11-16-36-47-266'
locust -f load_testing/locustfile.py --worker --loglevel ERROR --autostart --autoquit 10 & 

locust -f load_testing/locustfile.py --worker --loglevel ERROR --autostart --autoquit 10 &

locust -f load_testing/locustfile.py --headless -u 500 -r 10 -t 60s \
       --print-stats --only-summary --loglevel ERROR \
       --autostart --autoquit 10 --master --expect-workers 2

# Scenario 4: load test autoscaling (1-4 ml.c5.xlarge instances)

In [None]:
endpoint_name = 'imdb-tf-2021-09-21-17-37-20-2021-10-04-21-28-40-974'
resource_id=f'endpoint/{endpoint_name}/variant/AllTraffic' # This is the format in which application autoscaling references the endpoint

response = autoscaling_client.register_scalable_target(
    ServiceNamespace='sagemaker',
    ResourceId=resource_id,
    ScalableDimension='sagemaker:variant:DesiredInstanceCount',
    MinCapacity=1,
    MaxCapacity=4)

In [None]:
response = autoscaling_client.describe_scaling_policies(
    ServiceNamespace='sagemaker'
)
# print(response)
for i in response['ScalingPolicies']:
    print('')
    print(i['PolicyName'])
    print('')
    if('TargetTrackingScalingPolicyConfiguration' in i):
        print(i['TargetTrackingScalingPolicyConfiguration']) 
    else:
        print(i['StepScalingPolicyConfiguration'])
    print('')

In [None]:
%%sh
export ENDPOINT_NAME='imdb-tf-2021-09-21-17-37-20-2021-10-04-21-28-40-974'
locust -f load_testing/locustfile.py --worker --loglevel ERROR --autostart --autoquit 10 & 

locust -f load_testing/locustfile.py --worker --loglevel ERROR --autostart --autoquit 10 &

locust -f load_testing/locustfile.py --headless -u 500 -r 10 -t 600s \
       --print-stats --only-summary --loglevel ERROR \
       --autostart --autoquit 10 --master --expect-workers 2

Uncomment and run the next cell to delete endpoints to stop incurring cost.

In [None]:
# predictor_g4dn_xl.delete_endpoint()
# predictor_c5_2xl.delete_endpoint()
# predictor_g4dn_xl.delete_endpoint()