In [2]:
!pip install locust

Collecting locust
  Downloading locust-1.4.3-py3-none-any.whl (764 kB)
[K     |████████████████████████████████| 764 kB 17.1 MB/s eta 0:00:01
Collecting Flask-BasicAuth>=0.2.0
  Downloading Flask-BasicAuth-0.2.0.tar.gz (16 kB)
Collecting ConfigArgParse>=1.0
  Downloading ConfigArgParse-1.4.tar.gz (45 kB)
[K     |████████████████████████████████| 45 kB 5.6 MB/s  eta 0:00:01
[?25hCollecting geventhttpclient>=1.4.4
  Downloading geventhttpclient-1.4.4-cp36-cp36m-manylinux2010_x86_64.whl (76 kB)
[K     |████████████████████████████████| 76 kB 8.7 MB/s  eta 0:00:01
Building wheels for collected packages: ConfigArgParse, Flask-BasicAuth
  Building wheel for ConfigArgParse (setup.py) ... [?25ldone
[?25h  Created wheel for ConfigArgParse: filename=ConfigArgParse-1.4-py3-none-any.whl size=19639 sha256=25c1a0a2b286d0a8dc1178cec4103e4f8c47e87917cb1079316bcd4a81ae7aa5
  Stored in directory: /home/ec2-user/.cache/pip/wheels/d7/58/75/55e0ab6ba4dccaa280df70550a38a98db42a9e66555b056c09
  Buildin

In [52]:
!which locust

/home/ec2-user/anaconda3/envs/python3/bin/locust


In [78]:
import boto3
from botocore.config import Config
import pandas as pd
import itertools
import datetime

region = 'us-east-1'
content_type = 'text/csv'
endpoint_name='sagemaker-decision-trees-2021-03-17-03-32-20-604'

boto3config = Config(
    retries={
        'max_attempts': 100,
        'mode': 'standard'
    }
)

payload='5.0,3.5,1.3,0.3\n'

sagemaker_client = boto3.client('sagemaker-runtime',
                                     config=boto3config,
                                     region_name=region)

response = sagemaker_client.invoke_endpoint(
    EndpointName=endpoint_name,
    Body=payload,
    ContentType=content_type
)
response_body = response["Body"].read()

response_body

b'setosa\n'

In [79]:
cw_start = datetime.datetime.utcnow()
!/home/ec2-user/anaconda3/envs/python3/bin/locust -f locust_script.py -u 50 --headless --host=http://sagemaker-decision-trees-2021-03-17-03-32-20-604 --stop-timeout 90 -L DEBUG -t 5m --logfile=logfile.log --csv=locust.csv --csv-full-history --reset-stats              
cw_end = datetime.datetime.utcnow()        

sagemaker-decision-trees-2021-03-17-03-32-20-604
 Name                                                          # reqs      # fails  |     Avg     Min     Max  Median  |   req/s failures/s
--------------------------------------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------------------------------------
 Aggregated                                                         0     0(0.00%)  |       0       0       0       0  |    0.00    0.00

sagemaker-decision-trees-2021-03-17-03-32-20-604
 Name                                                          # reqs      # fails  |     Avg     Min     Max  Median  |   req/s failures/s
--------------------------------------------------------------------------------------------------------------------------------------------
 custom_protocol_boto3 sagemaker_client_invoke_endpoint      

In [80]:
locust_data = pd.read_csv('locust.csv_stats.csv')
for index, row in locust_data.head(n=2).iterrows():
     print(index, row)

0 Type                                custom_protocol_boto3
Name                     sagemaker_client_invoke_endpoint
Request Count                                       92959
Failure Count                                           0
Median Response Time                                  120
Average Response Time                             118.536
Min Response Time                                      57
Max Response Time                                   10769
Average Content Size                                    7
Requests/s                                        371.618
Failures/s                                              0
50%                                                   120
66%                                                   130
75%                                                   130
80%                                                   130
90%                                                   140
95%                                                   150
98%         

In [97]:
import math
import time

statistics = ['Sum', 'SampleCount', 'Average', 'Minimum', 'Maximum']
extended = ['p50', 'p90', 'p95', 'p99', 'p100']

metrics_to_gather = []
metrics_to_gather.append('CPUUtilization')
metrics_to_gather.append('MemoryUtilization')
metrics_to_gather.append('GPUUtilization')
metrics_to_gather.append('GPUMemoryUtilization')
metrics_to_gather.append('DiskUtilization')
metrics_to_gather.append('ModelLatency')
metrics_to_gather.append('OverheadLatency')
metrics_to_gather.append('Invocations')
metrics_to_gather.append('Invocation4XXErrors')
metrics_to_gather.append('Invocation5XXErrors')
metrics_to_gather.append('InvocationsPerInstance')
#metrics_to_gather.append('ModelLoadingTime')
#metrics_to_gather.append('ModelCacheHit')
#metrics_to_gather.append('ModelDownloadingTime')
#metrics_to_gather.append('ModelLoadingWaitTime')
#metrics_to_gather.append('ModelUnloadingTime')
#metrics_to_gather.append('LoadedModelCount')

def get_sample_count(cw_end, cw_start):
    
    cloudwatch = boto3.client('cloudwatch')
    metrics_to_gather=['Invocations']
    statistics=['SampleCount']
    
    # Period must be 1, 5, 10, 30, or multiple of 60
    # Calculate closest multiple of 60 to the total elapsed time
    factor = math.ceil((cw_end - cw_start).total_seconds() / 60)
    period = factor * 60
    
    cloudwatch_ready = False
    
    # Keep polling CloudWatch metrics until datapoints are available
    while not cloudwatch_ready:
        time.sleep(90)
        for metric in metrics_to_gather:
            model_latency_metrics = cloudwatch.get_metric_statistics(MetricName=metric,
                                             Dimensions=[{'Name': 'EndpointName',
                                                          'Value': endpoint_name},
                                                         {'Name': 'VariantName',
                                                          'Value': "AllTraffic"}],
                                             Namespace="AWS/SageMaker",
                                             StartTime=cw_start,
                                             EndTime=cw_end,
                                             Period=period,
                                             Statistics=statistics
                                             )
            #print(metric)
            if len(model_latency_metrics['Datapoints']) > 0:
                samplecount = model_latency_metrics['Datapoints'][0]['SampleCount']
                cloudwatch_ready = True
    
    return(samplecount)

def collect_cloudwatch_metrics(statistics, extended, metrics_to_gather, total_runs, cw_end, cw_start):
    
    print('Getting Cloudwatch:')
    cloudwatch = boto3.client('cloudwatch')

    # Period must be 1, 5, 10, 30, or multiple of 60
    # Calculate closest multiple of 60 to the total elapsed time
    factor = math.ceil((cw_end - cw_start).total_seconds() / 60)
    period = factor * 60
    print('Time elapsed: {} seconds'.format((cw_end - cw_start).total_seconds()))
    print('Using period of {} seconds\n'.format(period))

    cloudwatch_ready = False
    
    # Keep polling CloudWatch metrics until datapoints are available
    while not cloudwatch_ready:
        
        time.sleep(90)
        
        print('Waiting 30 seconds ...')

        for metric in metrics_to_gather:
            
            if(metric.find('Util') != -1):
                namespace = "/aws/sagemaker/Endpoints"
            else:
                namespace = "AWS/SageMaker"
            
            model_latency_metrics = cloudwatch.get_metric_statistics(MetricName=metric,
                                             Dimensions=[{'Name': 'EndpointName',
                                                          'Value': endpoint_name},
                                                         {'Name': 'VariantName',
                                                          'Value': "AllTraffic"}],
                                             Namespace=namespace,
                                             StartTime=cw_start,
                                             EndTime=cw_end,
                                             Period=period,
                                             Statistics=statistics,
                                             ExtendedStatistics=extended
                                             )            
            #print(metric)
            if len(model_latency_metrics['Datapoints']) > 0:
                #print(model_latency_metrics)
                print(metric +'\n')
                side_avg = model_latency_metrics['Datapoints'][0]['Average'] / total_runs
                side_p50 = model_latency_metrics['Datapoints'][0]['ExtendedStatistics']['p50'] / total_runs
                side_p90 = model_latency_metrics['Datapoints'][0]['ExtendedStatistics']['p90'] / total_runs
                side_p95 = model_latency_metrics['Datapoints'][0]['ExtendedStatistics']['p95'] / total_runs
                side_p99 = model_latency_metrics['Datapoints'][0]['ExtendedStatistics']['p99'] / total_runs
                side_p100 = model_latency_metrics['Datapoints'][0]['ExtendedStatistics']['p100'] / total_runs

                sumcount = model_latency_metrics['Datapoints'][0]['Sum']
                samplecount = model_latency_metrics['Datapoints'][0]['SampleCount']
                average = model_latency_metrics['Datapoints'][0]['Average']
                minimum = model_latency_metrics['Datapoints'][0]['Minimum']
                maximum = model_latency_metrics['Datapoints'][0]['Maximum']

                #statistics = ['Sum', 'SampleCount', 'Average', 'Minimum', 'Maximum']
                #extended = ['p50', 'p90', 'p95', 'p99', 'p100']
                print('Avg | P50 | P90 | P95 | P95 | P100')
                print('{:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f}\n'.format(side_avg, side_p50, side_p90, side_p95, side_p99, side_p100))
                print('Sum | SampleCount | Average | Minimum | Maximum')
                print('{:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f}\n'.format(sumcount, samplecount, average, minimum, maximum))
                
                cloudwatch_ready = True


In [None]:
total_runs = get_sample_count(cw_end, cw_start)
collect_cloudwatch_metrics(statistics, extended, metrics_to_gather, total_runs, cw_end, cw_start)