In [24]:
import pandas as pd
from sklearn.datasets import fetch_california_housing
import sagemaker
from sagemaker.inputs import TrainingInput
from sagemaker.serializers import CSVSerializer
import boto3
import os

# IAM role with permissions to create an endpoint, S3 bucket with a preferred prefix
role = "<YOUR_IAM_ROLE>"
bucket = "<YOUR_BUCKET_NAME>"
prefix = "<YOUR_FOLDER_NAME>"

## Download the dataset and upload it to S3

In [21]:
# Download California Housing Dataset
data = fetch_california_housing()
df = pd.DataFrame(data['data'], columns=data['feature_names'])
df['Target'] = data['target']

# SageMaker XGboost expect the target column to be the first one
df = df.loc[:,['Target'] + data['feature_names']]

# save as csv with no header row and index column
df.to_csv("train.csv", header=None, index=False)

In [22]:
boto3.Session().resource("s3").Bucket(bucket).Object(os.path.join(prefix, "train.csv")).upload_file("train.csv")
s3_input_train = TrainingInput(s3_data="s3://{}/{}".format(bucket, prefix), content_type="csv")

## Train the XGBoost model

In [None]:
container = sagemaker.image_uris.retrieve("xgboost", boto3.Session().region_name, "latest")

sess = sagemaker.Session()

xgb = sagemaker.estimator.Estimator(
    container,
    role,
    instance_count=1,
    instance_type="ml.m4.xlarge",
    output_path="s3://{}/{}/output".format(bucket, prefix),
    sagemaker_session=sess,
)
xgb.set_hyperparameters(
    max_depth=5,
    eta=0.2,
    gamma=4,
    min_child_weight=6,
    subsample=0.8,
    silent=0,
    num_round=100,
)

xgb.fit({"train": s3_input_train})

## Create Real-Time Endpoint

In [25]:
xgb_predictor = xgb.deploy(initial_instance_count=1, instance_type="ml.m4.xlarge", serializer=CSVSerializer())

------!

In [26]:
xgb_predictor.predict("8.3252,41.0,6.984126984126984,1.0238095238095237,322.0,2.5555555555555554,37.88,-122.23")

b'4.154237747192383'

In [54]:
xgb_predictor.delete_endpoint()

## Create Async Endpoint

In [35]:
from sagemaker.async_inference import AsyncInferenceConfig

# Create an empty AsyncInferenceConfig object to use default values
async_config = AsyncInferenceConfig(output_path=f"s3://{bucket}/{prefix}/output")

# deploy model to SageMaker Inference
xgb_async_predictor = xgb.deploy(
    async_inference_config=async_config,
    initial_instance_count=1, # number of instances
    instance_type='ml.m4.xlarge', # instance type
    serializer=CSVSerializer(), # define serializer to convert bytes to CSV
)

------!

In [50]:
# Alternatively, you can provide the input_path parameter for predict_async with the s3 path for the input data
response = xgb_async_predictor.predict_async("8.0,41.0,6.9,1.0,322.0,2.5,37.8,-122.2")
output_location = response.output_path

In [51]:
import time
from botocore.exceptions import ClientError
import boto3

def get_output(s3_client, output_path):
    output_bucket = output_path.split('/')[2]
    output_key = "/".join(output_path.split('/')[3:])
    while True:
        try:
            obj = s3_client.Object(output_bucket, output_key)
            output = obj.get()['Body'].read().decode('utf-8')
            return output
        except ClientError as e:
            if e.response["Error"]["Code"] == "NoSuchKey":
                print("waiting for output...")
                time.sleep(2)
                continue
            raise

In [52]:
s3 = boto3.resource('s3')
output = get_output(s3, output_location)
print(f"Output: {output}")

Output: 4.112793445587158


In [53]:
xgb_async_predictor.delete_endpoint()

## Example Log

ModelLatency: 3047 us, RequestDownloadLatency: 18701 us, ResponseUploadLatency: 59713 us, TimeInBacklog: 5 ms, TotalProcessingTime: 94 ms