## XGBoost-MME Example

In [3]:
import boto3
import sagemaker
from sagemaker.estimator import Estimator

boto_session = boto3.session.Session()
region = boto_session.region_name

#sagemaker_session = sagemaker.Session()
base_job_prefix = 'xgboost-example'
#role = sagemaker.get_execution_role()

#default_bucket = sagemaker_session.default_bucket()
s3_prefix = base_job_prefix

training_instance_type = 'ml.m5.xlarge'

In [5]:
#sagemaker_session = sagemaker.Session()

In [2]:
!aws s3 cp s3://sagemaker-sample-files/datasets/tabular/uci_abalone/train_csv/abalone_dataset1_train.csv .

download: s3://sagemaker-sample-files/datasets/tabular/uci_abalone/train_csv/abalone_dataset1_train.csv to ./abalone_dataset1_train.csv


In [3]:
!aws s3 cp abalone_dataset1_train.csv s3://{default_bucket}/xgboost-regression/train.csv

upload: ./abalone_dataset1_train.csv to s3://sagemaker-us-east-1-474422712127/xgboost-regression/train.csv


In [4]:
from sagemaker.inputs import TrainingInput
training_path = f's3://{default_bucket}/xgboost-regression/train.csv'
train_input = TrainingInput(training_path, content_type="text/csv")

In [5]:
model_path = f's3://{default_bucket}/{s3_prefix}/xgb_model'

image_uri = sagemaker.image_uris.retrieve(
    framework="xgboost",
    region=region,
    version="1.0-1",
    py_version="py3",
    instance_type=training_instance_type,
)

xgb_train = Estimator(
    image_uri=image_uri,
    instance_type=training_instance_type,
    instance_count=1,
    output_path=model_path,
    sagemaker_session=sagemaker_session,
    role=role
)

xgb_train.set_hyperparameters(
    objective="reg:linear",
    num_round=50,
    max_depth=5,
    eta=0.2,
    gamma=4,
    min_child_weight=6,
    subsample=0.7,
    silent=0,
)

In [6]:
xgb_train.fit({'train': train_input})

2023-01-25 16:08:10 Starting - Starting the training job...
2023-01-25 16:08:36 Starting - Preparing the instances for trainingProfilerReport-1674662890: InProgress
......
2023-01-25 16:09:36 Downloading - Downloading input data......
2023-01-25 16:10:40 Training - Training image download completed. Training in progress.
2023-01-25 16:10:40 Uploading - Uploading generated training model[34m[2023-01-25 16:10:31.924 ip-10-0-191-237.ec2.internal:7 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:Failed to parse hyperparameter objective value reg:linear to Json.[0m
[34mReturning the value itself[0m
[34mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[34mINFO:sagemaker_xgboost_container.training:Running XGBoost Sagemaker in algorithm mode[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34mINFO:root:Determined 

In [7]:
model_artifacts = xgb_train.model_data
model_artifacts

's3://sagemaker-us-east-1-474422712127/xgboost-example/xgb_model/sagemaker-xgboost-2023-01-25-16-08-10-225/output/model.tar.gz'

In [8]:
!aws s3 cp {model_artifacts} model.tar.gz

download: s3://sagemaker-us-east-1-474422712127/xgboost-example/xgb_model/sagemaker-xgboost-2023-01-25-16-08-10-225/output/model.tar.gz to ./model.tar.gz


In [9]:
#Bucket for model artifacts
default_bucket = sagemaker_session.default_bucket()
print(default_bucket)

sagemaker-us-east-1-474422712127


In [10]:
%%sh

s3_bucket='sagemaker-us-east-1-474422712127'

for i in {0..1}
do
  aws s3 cp model.tar.gz s3://$s3_bucket/mme-xgboost/xgboost-$i.tar.gz 
done

upload: ./model.tar.gz to s3://sagemaker-us-east-1-474422712127/mme-xgboost/xgboost-0.tar.gz
upload: ./model.tar.gz to s3://sagemaker-us-east-1-474422712127/mme-xgboost/xgboost-1.tar.gz


In [11]:
!aws s3 ls s3://sagemaker-us-east-1-474422712127/mme-xgboost/

2023-01-25 16:13:00      32665 xgboost-0.tar.gz
2023-01-25 16:13:00      32665 xgboost-1.tar.gz


In [6]:
model_url = 's3://sagemaker-us-east-1-474422712127/mme-xgboost/'

In [7]:
!aws s3 ls {model_url}

2023-01-25 16:13:00      32665 xgboost-0.tar.gz
2023-01-25 16:13:00      32665 xgboost-1.tar.gz


In [8]:
client = boto3.client(service_name="sagemaker")
runtime = boto3.client(service_name="sagemaker-runtime")

In [10]:
image_uri = sagemaker.image_uris.retrieve(
    framework="xgboost",
    region=region,
    version="1.0-1",
    py_version="py3",
    instance_type=training_instance_type,
)
image_uri

'683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3'

In [13]:
from time import gmtime, strftime
model_name = 'mme-source' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())

print('Model name: ' + model_name)
print('Model data Url: ' + model_url)

create_model_response = client.create_model(
    ModelName=model_name,
    Containers=[
        {
            "Image": image_uri,
            "Mode": "MultiModel",
            "ModelDataUrl": model_url
        }
    ],
    ExecutionRoleArn='arn:aws:iam::474422712127:role/sagemaker-role-BYOC',
)
print("Model Arn: " + create_model_response["ModelArn"])

Model name: mme-source2023-02-08-20-19-08
Model data Url: s3://sagemaker-us-east-1-474422712127/mme-xgboost/
Model Arn: arn:aws:sagemaker:us-east-1:474422712127:model/mme-source2023-02-08-20-19-08


In [14]:
#Step 2: EPC Creation
xgboost_epc_name = "mme-source" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
endpoint_config_response = client.create_endpoint_config(
    EndpointConfigName=xgboost_epc_name,
    ProductionVariants=[
        {
            "VariantName": "xgboostvariant",
            "ModelName": model_name,
            "InstanceType": "ml.m5.xlarge",
            "InitialInstanceCount": 1,
            #"Environment": {} 
        },
    ],
)
print("Endpoint Configuration Arn: " + endpoint_config_response["EndpointConfigArn"])

Endpoint Configuration Arn: arn:aws:sagemaker:us-east-1:474422712127:endpoint-config/mme-source2023-02-08-20-19-12


In [15]:
#Step 3: EP Creation
endpoint_name = "mme-source" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
create_endpoint_response = client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=xgboost_epc_name,
)
print("Endpoint Arn: " + create_endpoint_response["EndpointArn"])

Endpoint Arn: arn:aws:sagemaker:us-east-1:474422712127:endpoint/mme-source2023-02-08-20-19-15


In [16]:
#Monitor creation
from time import gmtime, strftime
import time
describe_endpoint_response = client.describe_endpoint(EndpointName=endpoint_name)
while describe_endpoint_response["EndpointStatus"] == "Creating":
    describe_endpoint_response = client.describe_endpoint(EndpointName=endpoint_name)
    print(describe_endpoint_response["EndpointStatus"])
    time.sleep(15)
print(describe_endpoint_response)

Creating
Creating
Creating
Creating
Creating
Creating
Creating
Creating
Creating
InService
{'EndpointName': 'mme-source2023-02-08-20-19-15', 'EndpointArn': 'arn:aws:sagemaker:us-east-1:474422712127:endpoint/mme-source2023-02-08-20-19-15', 'EndpointConfigName': 'mme-source2023-02-08-20-19-12', 'ProductionVariants': [{'VariantName': 'xgboostvariant', 'DeployedImages': [{'SpecifiedImage': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3', 'ResolvedImage': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost@sha256:b5a61741d3d65872d4b5e6090c200ecf8d8c1ca4ea716261f573dcbc49a46485', 'ResolutionTime': datetime.datetime(2023, 2, 8, 20, 19, 15, 998000, tzinfo=tzlocal())}], 'CurrentWeight': 1.0, 'DesiredWeight': 1.0, 'CurrentInstanceCount': 1, 'DesiredInstanceCount': 1}], 'EndpointStatus': 'InService', 'CreationTime': datetime.datetime(2023, 2, 8, 20, 19, 15, 356000, tzinfo=tzlocal()), 'LastModifiedTime': datetime.datetime(2023, 2, 8, 20, 21, 25, 82000, tz

In [17]:
endpoint_name = 'mme-source2023-02-08-20-19-15'

In [21]:
import boto3

resp = runtime.invoke_endpoint(EndpointName=endpoint_name, Body=b'.345,0.224414,.131102,0.042329,.279923,-0.110329,-0.099358,0.0', 
                           ContentType='text/csv', TargetModel = "xgboost-1.tar.gz")

print(resp['Body'].read())

b'[4.566554546356201]'


In [26]:
payload = b'.345,0.224414,.131102,0.042329,.279923,-0.110329,-0.099358,0.0'

In [29]:
import numpy as np 
import datetime
import math
import time
import matplotlib.pyplot as plt
import random

total_runs = 500
max_models = 1
content_type = "text/csv" 
print('Running {} inferences for {} (max models: {}):'.format(total_runs, endpoint_name, max_models))
client_times = []
errors_list = []
cw_start = datetime.datetime.utcnow()
errors = 0

for _ in range(total_runs):
    target_model = f'xgboost-{random.randint(0,max_models)}.tar.gz'
    client_start = time.time()
    response = runtime.invoke_endpoint(
        EndpointName=endpoint_name,
        ContentType=content_type,
        TargetModel=target_model,
        Body=payload)
    client_end = time.time()
    client_times.append((client_end - client_start)*1000)
    skunk = response['Body'].read()  

cw_end = datetime.datetime.utcnow()    

cw_duration = cw_end - cw_start 
duration_in_s = cw_duration.total_seconds() 

tps = total_runs/duration_in_s

print('\nErrors - {:.4f} out of {:.4f} total runs | {:.4f}% in {:.4f} seconds \n'.format(errors, total_runs, (errors/total_runs)*100, duration_in_s))
errors = 0

print('\nTPS: {:.4f}'.format(tps))
    
print('Client end-to-end latency percentiles:')
client_avg = np.mean(client_times)
client_p50 = np.percentile(client_times, 50)
client_p90 = np.percentile(client_times, 90)
client_p95 = np.percentile(client_times, 95)
client_p100 = np.percentile(client_times, 100)
print('Avg | P50 | P90 | P95 | P100')
print('{:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} \n'.format(client_avg, client_p50, client_p90, client_p95, client_p100))

# Give 5 minute buffer to end
cw_end += datetime.timedelta(minutes=5)

Running 500 inferences for mme-source2023-02-08-20-19-15 (max models: 1):

Errors - 0.0000 out of 500.0000 total runs | 0.0000% in 3.8919 seconds 


TPS: 128.4733
Client end-to-end latency percentiles:
Avg | P50 | P90 | P95 | P100
7.7063 | 7.5651 | 8.6972 | 9.1191 | 13.4287 

