In [1]:

# Setup clients
import boto3
client = boto3.client(service_name="sagemaker")
runtime = boto3.client(service_name="sagemaker-runtime")

import sagemaker
from sagemaker.estimator import Estimator

boto_session = boto3.session.Session()
region = boto_session.region_name
print(region)

sagemaker_session = sagemaker.Session()
base_job_prefix = "xgboost-example"
role = sagemaker.get_execution_role()
print(role)

default_bucket = sagemaker_session.default_bucket()
s3_prefix = base_job_prefix

training_instance_type = "ml.m5.xlarge" 

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
ap-south-1
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
arn:aws:iam::740617167239:role/service-role/AmazonSageMaker-ExecutionRole-20240115T163460


In [2]:
# retrieve data
! curl https://sagemaker-sample-files.s3.amazonaws.com/datasets/tabular/uci_abalone/train_csv/abalone_dataset1_train.csv > abalone_dataset1_train.csv
  
 # upload data to S3
!aws s3 cp abalone_dataset1_train.csv s3://{default_bucket}/xgboost-regression/train.csv

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  131k  100  131k    0     0  98977      0  0:00:01  0:00:01 --:--:-- 99056
upload: ./abalone_dataset1_train.csv to s3://sagemaker-ap-south-1-740617167239/xgboost-regression/train.csv


In [3]:
from sagemaker.inputs import TrainingInput

training_path = f"s3://{default_bucket}/xgboost-regression/train.csv"
train_input = TrainingInput(training_path, content_type="text/csv")

model_path = f"s3://{default_bucket}/{s3_prefix}/xgb_model"

# retrieve xgboost image
image_uri = sagemaker.image_uris.retrieve(
    framework="xgboost",
    region=region,
    version="1.0-1",
    py_version="py3",
    instance_type=training_instance_type,
)

# Configure Training Estimator
xgb_train = Estimator(
    image_uri=image_uri,
    instance_type=training_instance_type,
    instance_count=1,
    output_path=model_path,
    sagemaker_session=sagemaker_session,
    role=role,
)

# Set Hyperparameters
xgb_train.set_hyperparameters(
    objective="reg:linear",
    num_round=50,
    max_depth=5,
    eta=0.2,
    gamma=4,
    min_child_weight=6,
    subsample=0.7,
    silent=0,
)


In [15]:
# Fit model
xgb_train.fit({"train": train_input})

# Retrieve model data from training job
model_artifacts = xgb_train.model_data
model_artifacts

INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2024-01-30-10-05-36-978


2024-01-30 10:05:37 Starting - Starting the training job...
2024-01-30 10:05:51 Starting - Preparing the instances for training......
2024-01-30 10:06:44 Downloading - Downloading input data...
2024-01-30 10:07:13 Downloading - Downloading the training image..[34m[2024-01-30 10:07:46.971 ip-10-0-158-81.ap-south-1.compute.internal:7 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:Failed to parse hyperparameter objective value reg:linear to Json.[0m
[34mReturning the value itself[0m
[34mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[34mINFO:sagemaker_xgboost_container.training:Running XGBoost Sagemaker in algorithm mode[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34mINFO:root:Single node training.[0m
[34m[10:07:47] 2923x8 matrix with 23384

's3://sagemaker-ap-south-1-740617167239/xgboost-example/xgb_model/sagemaker-xgboost-2024-01-30-10-05-36-978/output/model.tar.gz'

In [5]:
import time
model_package_group_name = "xgboost-abalone-realtime"
model_package_group_input_dict = {
 "ModelPackageGroupName" : model_package_group_name,
 "ModelPackageGroupDescription" : "Model package group for xgboost regression model with Abalone dataset"
}

create_model_pacakge_group_response = client.create_model_package_group(**model_package_group_input_dict)
print('ModelPackageGroup Arn : {}'.format(create_model_pacakge_group_response['ModelPackageGroupArn']))


ModelPackageGroup Arn : arn:aws:sagemaker:ap-south-1:740617167239:model-package-group/xgboost-abalone-realtime


## TO Create Model Versions

In [17]:



model_package_group_arn = create_model_pacakge_group_response['ModelPackageGroupArn']
modelpackage_inference_specification =  {
    "InferenceSpecification": {
        "Containers": [
            {
                "Image": image_uri,
            }
        ],
        "SupportedContentTypes": [ "text/csv" ],
        "SupportedResponseMIMETypes": [ "text/csv" ],
    }
}
 
# Specify the model source
model_url = model_artifacts

# Specify the model data
modelpackage_inference_specification["InferenceSpecification"]["Containers"][0]["ModelDataUrl"]=model_url

create_model_package_input_dict = {
    "ModelPackageGroupName" : model_package_group_arn,
    "ModelPackageDescription" : "version 3",
    "ModelApprovalStatus" : "PendingManualApproval"
}
create_model_package_input_dict.update(modelpackage_inference_specification)

# Create cross-account model package
create_mode_package_response = client.create_model_package(**create_model_package_input_dict)
model_package_arn = create_mode_package_response["ModelPackageArn"]
print('ModelPackage Version ARN : {}'.format(model_package_arn))

ModelPackage Version ARN : arn:aws:sagemaker:ap-south-1:740617167239:model-package/xgboost-abalone-realtime/3


In [20]:
model_package_arn = client.list_model_packages(ModelPackageGroupName=model_package_group_name)['ModelPackageSummaryList']
model_package_arn[0]['ModelPackageArn']

{'ModelPackageGroupName': 'xgboost-abalone-realtime',
 'ModelPackageVersion': 3,
 'ModelPackageArn': 'arn:aws:sagemaker:ap-south-1:740617167239:model-package/xgboost-abalone-realtime/3',
 'ModelPackageDescription': 'version 3',
 'CreationTime': datetime.datetime(2024, 1, 30, 10, 12, 16, 302000, tzinfo=tzlocal()),
 'ModelPackageStatus': 'Completed',
 'ModelApprovalStatus': 'PendingManualApproval'}

In [8]:

client.describe_model_package(ModelPackageName=model_package_arn)

{'ModelPackageGroupName': 'xgboost-abalone-realtime',
 'ModelPackageVersion': 1,
 'ModelPackageArn': 'arn:aws:sagemaker:ap-south-1:740617167239:model-package/xgboost-abalone-realtime/1',
 'ModelPackageDescription': 'Model for regression with the Abalone dataset',
 'CreationTime': datetime.datetime(2024, 1, 29, 11, 40, 29, 16000, tzinfo=tzlocal()),
 'InferenceSpecification': {'Containers': [{'Image': '720646828776.dkr.ecr.ap-south-1.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3',
    'ImageDigest': 'sha256:7f2851af46434a0e13153f9415c925b653088f15753ab4cd85682fdf426c3924',
    'ModelDataUrl': 's3://sagemaker-ap-south-1-740617167239/xgboost-example/xgb_model/sagemaker-xgboost-2024-01-29-11-34-54-773/output/model.tar.gz'}],
  'SupportedContentTypes': ['text/csv'],
  'SupportedResponseMIMETypes': ['text/csv']},
 'ModelPackageStatus': 'Completed',
 'ModelPackageStatusDetails': {'ValidationStatuses': [],
  'ImageScanStatuses': []},
 'CertifyForMarketplace': False,
 'ModelApprovalStatus': 'Pen

In [9]:
model_package_update_input_dict = {
    "ModelPackageArn" : model_package_arn,
    "ModelApprovalStatus" : "Approved"
}
model_package_update_response = client.update_model_package(**model_package_update_input_dict)
print(model_package_update_response)

{'ModelPackageArn': 'arn:aws:sagemaker:ap-south-1:740617167239:model-package/xgboost-abalone-realtime/1', 'ResponseMetadata': {'RequestId': '3d48f63d-4324-4761-8c99-5d07a734a1e1', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '3d48f63d-4324-4761-8c99-5d07a734a1e1', 'content-type': 'application/x-amz-json-1.1', 'content-length': '104', 'date': 'Mon, 29 Jan 2024 11:42:32 GMT'}, 'RetryAttempts': 0}}


In [10]:
from time import gmtime, strftime
model_name = 'xgboost-regression-model' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print("Model name : {}".format(model_name))
container_list = [{'ModelPackageName': model_package_arn}]

create_model_response = client.create_model(
    ModelName = model_name,
    ExecutionRoleArn = role,
    Containers = container_list
)
print("Model arn : {}".format(create_model_response["ModelArn"]))

Model name : xgboost-regression-model2024-01-29-11-44-02
Model arn : arn:aws:sagemaker:ap-south-1:740617167239:model/xgboost-regression-model2024-01-29-11-44-02


In [12]:
endpoint_config_name = 'xgboost-regression-epc' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
instance_type = "ml.c5.xlarge"
print(endpoint_config_name)
create_endpoint_config_response = client.create_endpoint_config(
    EndpointConfigName = endpoint_config_name,
    ProductionVariants=[{
        'InstanceType': instance_type,
        'InitialInstanceCount': 1,
        'InitialVariantWeight': 1,
        'ModelName': model_name,
        'VariantName': 'AllTraffic'}])
print("Endpoint Configuration Arn: " + create_endpoint_config_response["EndpointConfigArn"])

xgboost-regression-epc2024-01-29-11-45-03
Endpoint Configuration Arn: arn:aws:sagemaker:ap-south-1:740617167239:endpoint-config/xgboost-regression-epc2024-01-29-11-45-03


In [13]:
endpoint_name = 'xgboost-realtime-ep' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print("EndpointName={}".format(endpoint_name))

create_endpoint_response = client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name)
print(create_endpoint_response['EndpointArn'])

# wait for endpoint to reach a terminal state (InService) using describe endpoint
import time

describe_endpoint_response = client.describe_endpoint(EndpointName=endpoint_name)

while describe_endpoint_response["EndpointStatus"] == "Creating":
    describe_endpoint_response = client.describe_endpoint(EndpointName=endpoint_name)
    print(describe_endpoint_response["EndpointStatus"])
    time.sleep(15)

describe_endpoint_response

EndpointName=xgboost-realtime-ep2024-01-29-11-45-09
arn:aws:sagemaker:ap-south-1:740617167239:endpoint/xgboost-realtime-ep2024-01-29-11-45-09
Creating
Creating
Creating
Creating
Creating
Creating
Creating
Creating
Creating
Creating
InService


{'EndpointName': 'xgboost-realtime-ep2024-01-29-11-45-09',
 'EndpointArn': 'arn:aws:sagemaker:ap-south-1:740617167239:endpoint/xgboost-realtime-ep2024-01-29-11-45-09',
 'EndpointConfigName': 'xgboost-regression-epc2024-01-29-11-45-03',
 'ProductionVariants': [{'VariantName': 'AllTraffic',
   'DeployedImages': [{'SpecifiedImage': '720646828776.dkr.ecr.ap-south-1.amazonaws.com/sagemaker-xgboost@sha256:7f2851af46434a0e13153f9415c925b653088f15753ab4cd85682fdf426c3924',
     'ResolvedImage': '720646828776.dkr.ecr.ap-south-1.amazonaws.com/sagemaker-xgboost@sha256:7f2851af46434a0e13153f9415c925b653088f15753ab4cd85682fdf426c3924',
     'ResolutionTime': datetime.datetime(2024, 1, 29, 11, 45, 10, 64000, tzinfo=tzlocal())}],
   'CurrentWeight': 1.0,
   'DesiredWeight': 1.0,
   'CurrentInstanceCount': 1,
   'DesiredInstanceCount': 1}],
 'EndpointStatus': 'InService',
 'CreationTime': datetime.datetime(2024, 1, 29, 11, 45, 9, 697000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2024, 

In [14]:

response = runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    Body=b".345,0.224414,.131102,0.042329,.279923,-0.110329,-0.099358,0.0",
    ContentType="text/csv",
)

print(response["Body"].read())

b'4.566554546356201'
