In [25]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [26]:
import os
import io
from datetime import datetime
import logging

<IPython.core.display.Javascript object>

In [27]:
import boto3
import sagemaker
from sagemaker.session import TrainingInput
from sagemaker import image_uris
from sagemaker import hyperparameters

<IPython.core.display.Javascript object>

In [28]:
boto3.set_stream_logger(name="botocore.credentials", level=logging.WARNING)

<IPython.core.display.Javascript object>

In [29]:
region = sagemaker.Session().boto_region_name
print(region)

ap-southeast-2


<IPython.core.display.Javascript object>

In [30]:
# role_arn = sagemaker.get_execution_role()
role_arn = os.getenv("SGMKR_ROLE_ARN")

<IPython.core.display.Javascript object>

In [31]:
bucket = "sgmkr-course"
prefix = "iris"

<IPython.core.display.Javascript object>

In [32]:
!aws s3 ls {bucket}/{prefix}/

                           PRE batch_transform/
                           PRE data/
                           PRE model/
2023-02-03 13:15:44          0 


<IPython.core.display.Javascript object>

In [33]:
!aws s3 ls {bucket}/{prefix}/data/ --recursive

2023-02-03 13:16:32          0 iris/data/
2023-03-16 18:09:28        900 iris/data/iris_test.csv
2023-03-16 18:09:27       1800 iris/data/iris_train.csv


<IPython.core.display.Javascript object>

In [34]:
train_file = "data/iris_train.csv"
valid_file = "data/iris_test.csv"

<IPython.core.display.Javascript object>

In [35]:
train_ip = TrainingInput(
    "s3://{}/{}/{}".format(bucket, prefix, train_file), content_type="csv"
)
valid_ip = TrainingInput(
    "s3://{}/{}/{}".format(bucket, prefix, valid_file), content_type="csv"
)

<IPython.core.display.Javascript object>

In [36]:
model_op = "s3://{}/{}/{}".format(bucket, prefix, "model")

<IPython.core.display.Javascript object>

In [37]:
train_image_uri = sagemaker.image_uris.retrieve("xgboost", region, "latest")
print(train_image_uri)

Ignoring unnecessary instance type: None.


544295431143.dkr.ecr.ap-southeast-2.amazonaws.com/xgboost:latest


<IPython.core.display.Javascript object>

In [38]:
base_job_name = "iris-xgboost"

<IPython.core.display.Javascript object>

In [41]:
xgb_estimator = sagemaker.estimator.Estimator(
    image_uri=train_image_uri,
    role=role_arn,
    base_job_name=base_job_name,
    instance_count=1,
    instance_type="ml.m4.xlarge",
    volume_size=5,
    output_path=model_op,
    sagemaker_session=sagemaker.Session(),
)

<IPython.core.display.Javascript object>

In [42]:
xgb_estimator.set_hyperparameters(
    num_class=3, max_depth=5, num_round=10, objective="multi:softmax",
)

<IPython.core.display.Javascript object>

In [43]:
# xgb_estimator.set_hyperparameters(
#     num_class=3,
#     max_depth=5,
#     eta=0.2,
#     gamma=4,
#     min_child_weight=6,
#     subsample=0.7,
#     objective="multi:softmax",
#     num_round=10,
# )

<IPython.core.display.Javascript object>

In [47]:
job_name = "iris-xgboost-" + datetime.today().strftime("%Y-%m-%d-%H-%M-%S")
print(job_name)

iris-xgboost-2023-03-22-19-04-50


<IPython.core.display.Javascript object>

In [48]:
xgb_estimator.fit(
    {"train": train_ip, "validation": valid_ip}, wait=True, job_name=job_name
)

Creating training-job with name: iris-xgboost-2023-03-22-19-04-50


2023-03-22 08:04:51 Starting - Starting the training job...
2023-03-22 08:05:16 Starting - Preparing the instances for training......
2023-03-22 08:06:23 Downloading - Downloading input data...
2023-03-22 08:06:53 Training - Downloading the training image...
2023-03-22 08:07:24 Training - Training image download completed. Training in progress...[34mArguments: train[0m
[34m[2023-03-22:08:07:38:INFO] Running standalone xgboost training.[0m
[34m[2023-03-22:08:07:38:INFO] File size need to be processed in the node: 0.0mb. Available memory size in the node: 8597.44mb[0m
[34m[2023-03-22:08:07:38:INFO] Determined delimiter of CSV input is ','[0m
[34m[08:07:38] S3DistributionType set as FullyReplicated[0m
[34m[08:07:38] 100x4 matrix with 400 entries loaded from /opt/ml/input/data/train?format=csv&label_column=0&delimiter=,[0m
[34m[2023-03-22:08:07:38:INFO] Determined delimiter of CSV input is ','[0m
[34m[08:07:38] S3DistributionType set as FullyReplicated[0m
[34m[08:07:38] 50

<IPython.core.display.Javascript object>

In [72]:
!aws s3 ls {bucket}/{prefix}/model/

                           PRE iris-xgboost-2023-03-22-18-12-49/
                           PRE iris-xgboost-2023-03-22-18-58-53/
                           PRE iris-xgboost-2023-03-22-19-04-50/
                           PRE iris-xgboost2023-03-22-18-09-35/


<IPython.core.display.Javascript object>

### Inference 

In [50]:
from sagemaker.serializers import CSVSerializer

<IPython.core.display.Javascript object>

#### Deploy the model as an endpoint

In [51]:
type(xgb_estimator)

sagemaker.estimator.Estimator

<IPython.core.display.Javascript object>

In [52]:
xgb_predictor = xgb_estimator.deploy(
    initial_instance_count=1, instance_type="ml.t2.medium", serializer=CSVSerializer()
)

Creating model with name: iris-xgboost-2023-03-22-08-09-03-677
Creating endpoint-config with name iris-xgboost-2023-03-22-08-09-03-677
Creating endpoint with name iris-xgboost-2023-03-22-08-09-03-677


------------!

<IPython.core.display.Javascript object>

#### Predictor single record

In [53]:
xgb_predictor.predict("7.7, 3.0, 6.1, 2.3")

b'2.0'

<IPython.core.display.Javascript object>

#### Endpoint

In [54]:
endpoint_name = xgb_predictor.endpoint_name
print(endpoint_name)

iris-xgboost-2023-03-22-08-09-03-677


<IPython.core.display.Javascript object>

In [55]:
sgmkr_runtime = boto3.client("runtime.sagemaker")

<IPython.core.display.Javascript object>

#### Endpoint - One record

In [56]:
payload_csv_text = "7.7, 3.0, 6.1, 2.3"
response = sgmkr_runtime.invoke_endpoint(
    EndpointName=endpoint_name, ContentType="text/csv", Body=payload_csv_text
)
print(response)

{'ResponseMetadata': {'RequestId': '7a8b2061-5659-4e0a-b335-dceab7d92266', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '7a8b2061-5659-4e0a-b335-dceab7d92266', 'x-amzn-invoked-production-variant': 'AllTraffic', 'date': 'Wed, 22 Mar 2023 08:15:38 GMT', 'content-type': 'text/csv; charset=utf-8', 'content-length': '3'}, 'RetryAttempts': 0}, 'ContentType': 'text/csv; charset=utf-8', 'InvokedProductionVariant': 'AllTraffic', 'Body': <botocore.response.StreamingBody object at 0x7fd8c818fa90>}


<IPython.core.display.Javascript object>

In [57]:
print(response["Body"].read().decode())

2.0


<IPython.core.display.Javascript object>

#### Endpoint - Multiple records

In [58]:
payload_csv_text = "7.7, 3.0, 6.1, 2.3 \n 7.9, 3.8, 6.4, 2.1"

response = sgmkr_runtime.invoke_endpoint(
    EndpointName=endpoint_name, ContentType="text/csv", Body=payload_csv_text
)
print(response["Body"].read().decode())

2.0,2.0


<IPython.core.display.Javascript object>

#### Endpoint - Multiple records from a local file

In [59]:
csv_buffer = open("data/iris_infer.csv")
payload_csv_text = csv_buffer.read()

response = sgmkr_runtime.invoke_endpoint(
    EndpointName=endpoint_name, ContentType="text/csv", Body=payload_csv_text
)
print(response["Body"].read().decode())

2.0,1.0,0.0,1.0,2.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,0.0,2.0,0.0,2.0,2.0,0.0,0.0,2.0,2.0,2.0,0.0,1.0,0.0,0.0,2.0,1.0,2.0,1.0,1.0,1.0,0.0,0.0,2.0,1.0,2.0,1.0,1.0,2.0


<IPython.core.display.Javascript object>

In [77]:
payload_csv_text

'6.3,3.3,4.7,1.6\n5.0,3.4,1.5,0.2\n5.8,2.7,4.1,1.0\n7.3,2.9,6.3,1.8\n4.9,2.4,3.3,1.0\n5.7,2.8,4.5,1.3\n5.7,3.8,1.7,0.3\n5.6,3.0,4.5,1.5\n5.5,2.3,4.0,1.3\n4.4,3.2,1.3,0.2\n5.8,4.0,1.2,0.2\n5.1,3.3,1.7,0.5\n5.1,3.4,1.5,0.2\n5.4,3.7,1.5,0.2\n6.4,2.8,5.6,2.2\n6.0,3.0,4.8,1.8\n5.6,2.5,3.9,1.1\n7.7,2.8,6.7,2.0\n5.7,2.8,4.1,1.3\n6.5,3.0,5.2,2.0\n5.6,3.0,4.1,1.3\n4.7,3.2,1.3,0.2\n6.5,3.0,5.5,1.8\n4.6,3.6,1.0,0.2\n6.5,3.0,5.8,2.2\n6.7,3.1,5.6,2.4\n5.0,3.2,1.2,0.2\n5.4,3.4,1.7,0.2\n6.2,3.4,5.4,2.3\n6.4,2.7,5.3,1.9\n6.9,3.1,5.1,2.3\n5.1,3.7,1.5,0.4\n5.4,3.0,4.5,1.5\n5.2,3.4,1.4,0.2\n4.5,2.3,1.3,0.3\n6.7,3.0,5.2,2.3\n5.7,2.9,4.2,1.3\n6.7,3.0,5.0,1.7\n6.0,3.4,4.5,1.6\n6.1,2.9,4.7,1.4\n5.0,2.3,3.3,1.0\n4.4,3.0,1.3,0.2\n4.9,3.0,1.4,0.2\n6.1,2.6,5.6,1.4\n6.0,2.9,4.5,1.5\n6.7,2.5,5.8,1.8\n4.9,2.5,4.5,1.7\n6.4,3.2,4.5,1.5\n6.1,3.0,4.9,1.8\n'

<IPython.core.display.Javascript object>

#### Endpoint - Multiple records from a S3 file

In [60]:
infer_ip_s3_uri = "s3://{}/{}/{}".format(
    bucket, prefix, "batch_transform/iris_infer.csv"
)

# payload_df = pd.read_csv(infer_ip_s3_uri)
# payload_df = wr.s3.read_csv(path=infer_ip_s3_uri)
s3_clnt = boto3.client("s3")
obj = s3_clnt.get_object(Bucket=bucket, Key="iris/batch_transform/iris_infer.csv")
payload_df = pd.read_csv(obj["Body"])

csv_buffer = io.StringIO()
payload_df.to_csv(csv_buffer, header=None, index=None)
payload_csv_text = payload_file.getvalue()

response = sgmkr_runtime.invoke_endpoint(
    EndpointName=endpoint_name, ContentType="text/csv", Body=payload_csv_text
)
print(response["Body"].read().decode())

1.0,0.0,1.0,2.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,0.0,2.0,0.0,2.0,2.0,0.0,0.0,2.0,2.0,2.0,0.0,1.0,0.0,0.0,2.0,1.0,2.0,1.0,1.0,1.0,0.0,0.0,2.0,1.0,2.0,1.0,1.0,2.0


<IPython.core.display.Javascript object>

#### Delete the endpoint

In [62]:
sgmkr_clnt = boto3.client("sagemaker")

<IPython.core.display.Javascript object>

In [63]:
sgmkr_clnt.delete_endpoint(EndpointName=endpoint_name)

{'ResponseMetadata': {'RequestId': '4a34bbbc-0aea-4d4e-8d5d-1e4657e626fd',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '4a34bbbc-0aea-4d4e-8d5d-1e4657e626fd',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Wed, 22 Mar 2023 08:16:50 GMT'},
  'RetryAttempts': 0}}

<IPython.core.display.Javascript object>

#### Batch Transform

In [64]:
batch_ip = "s3://{}/{}/{}".format(bucket, prefix, "batch_transform")
batch_op = "s3://{}/{}/{}".format(bucket, prefix, "batch_transform")

<IPython.core.display.Javascript object>

In [65]:
!aws s3 ls {batch_ip}/ --recursive

2023-02-03 13:29:08          0 iris/batch_transform/
2023-03-16 18:09:29        800 iris/batch_transform/iris_infer.csv


<IPython.core.display.Javascript object>

In [67]:
transformer = xgb_estimator.transformer(
    instance_count=1, instance_type="ml.m4.xlarge", output_path=batch_op
)

Creating model with name: iris-xgboost-2023-03-22-08-17-15-602


<IPython.core.display.Javascript object>

In [69]:
transformer.transform(
    data=batch_ip, data_type="S3Prefix", content_type="text/csv", split_type="Line"
)
transformer.wait()

Creating transform job with name: iris-xgboost-2023-03-22-08-17-22-760


.................................
[34mArguments: serve[0m
[34m[2023-03-22 08:22:53 +0000] [1] [INFO] Starting gunicorn 19.9.0[0m
[34m[2023-03-22 08:22:53 +0000] [1] [INFO] Listening at: http://0.0.0.0:8080 (1)[0m
[34m[2023-03-22 08:22:53 +0000] [1] [INFO] Using worker: gevent[0m
[34m[2023-03-22 08:22:53 +0000] [21] [INFO] Booting worker with pid: 21[0m
[34m[2023-03-22 08:22:53 +0000] [22] [INFO] Booting worker with pid: 22[0m
[35mArguments: serve[0m
[35m[2023-03-22 08:22:53 +0000] [1] [INFO] Starting gunicorn 19.9.0[0m
[35m[2023-03-22 08:22:53 +0000] [1] [INFO] Listening at: http://0.0.0.0:8080 (1)[0m
[35m[2023-03-22 08:22:53 +0000] [1] [INFO] Using worker: gevent[0m
[35m[2023-03-22 08:22:53 +0000] [21] [INFO] Booting worker with pid: 21[0m
[35m[2023-03-22 08:22:53 +0000] [22] [INFO] Booting worker with pid: 22[0m
[34m[2023-03-22 08:22:53 +0000] [23] [INFO] Booting worker with pid: 23[0m
[34m[2023-03-22 08:22:53 +0000] [24] [INFO] Booting worker with pid: 24

<IPython.core.display.Javascript object>

In [70]:
!aws s3 ls {bucket}/{prefix}/batch_transform/ --recursive

2023-02-03 13:29:08          0 iris/batch_transform/
2023-03-16 18:09:29        800 iris/batch_transform/iris_infer.csv
2023-03-22 19:22:58        200 iris/batch_transform/iris_infer.csv.out


<IPython.core.display.Javascript object>

In [74]:
!aws s3 cp s3://{bucket}/{prefix}/batch_transform/iris_infer.csv.out .

Completed 200 Bytes/200 Bytes (1.2 KiB/s) with 1 file(s) remainingdownload: s3://sgmkr-course/iris/batch_transform/iris_infer.csv.out to ./iris_infer.csv.out


<IPython.core.display.Javascript object>

In [76]:
!head -n 5 iris_infer.csv.out

2.0
1.0
0.0
1.0
2.0


<IPython.core.display.Javascript object>