In [1]:
import sagemaker
from utils import get_secret
from toy_datasets import upload_dataset_to_s3

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [2]:
session = sagemaker.Session()
role = get_secret('role_arn')
s3_bucket_uri = get_secret('s3_bucket_uri')
s3_bucket_name = get_secret('s3_bucket_name')

https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/image_uri_config/xgboost.json

In [3]:
image_uri = sagemaker.image_uris.retrieve('xgboost', region='us-east-1', version='1.5-1')

In [4]:
estimator = sagemaker.estimator.Estimator(
    image_uri=image_uri,
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    output_path=f"{s3_bucket_uri}/pipelines-output",
    sagemaker_session=session
)

You need to create an AWS Identity and Access Management (IAM) role that grants the necessary permissions for Amazon SageMaker to access resources such as S3 buckets, execute training jobs, and deploy models.

```json
    {
        "Version": "2012-10-17",
        "Statement": [
            {
                "Effect": "Allow",
                "Action": [
                    "s3:GetObject",
                    "s3:PutObject",
                    "s3:ListBucket",
                    "s3:DeleteObject"
                ],
                "Resource": [
                    "arn:aws:s3:::your-s3-bucket/*",
                    "arn:aws:s3:::your-s3-bucket"
                ]
            },
            {
                "Effect": "Allow",
                "Action": [
                    "sagemaker:CreateModel",
                    "sagemaker:CreateEndpointConfig",
                    "sagemaker:CreateEndpoint",
                    "sagemaker:DeleteEndpoint",
                    "sagemaker:DeleteEndpointConfig"
                ],
                "Resource": "*"
            },
            {
                "Effect": "Allow",
                "Action": [
                    "sagemaker:CreateTrainingJob",
                    "sagemaker:DescribeTrainingJob",
                    "sagemaker:StopTrainingJob"
                ],
                "Resource": "*"
            }
        ]
    }
```


https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost_hyperparameters.html

In [5]:
estimator.set_hyperparameters(
    max_depth=5,
    objective='multi:softmax',
    num_class=3,
    num_round=10
)

In [6]:
dataset_name = 'iris'
upload_dataset_to_s3(dataset_name, s3_bucket_name)

None
None
Files uploaded to S3 successfully.


In [7]:
from sagemaker.inputs import TrainingInput

s3_train = TrainingInput(
    s3_data=f's3://{s3_bucket_name}/iris_dataset/train_data.csv',
    content_type='csv'
)

s3_validate = TrainingInput(
    s3_data=f's3://{s3_bucket_name}/iris_dataset/test_data.csv',
    content_type='csv'
)

In [8]:
estimator.fit({
    'train': s3_train,
    'validation': s3_validate
})

INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2024-01-26-20-05-22-525


2024-01-26 20:05:22 Starting - Starting the training job...
2024-01-26 20:05:41 Starting - Preparing the instances for training.........
2024-01-26 20:06:57 Downloading - Downloading input data...
2024-01-26 20:07:52 Downloading - Downloading the training image......
2024-01-26 20:08:48 Training - Training image download completed. Training in progress.
  from pandas import MultiIndex, Int64Index[0m
[34m[2024-01-26 20:08:43.231 ip-10-0-162-240.ec2.internal:8 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2024-01-26 20:08:43.255 ip-10-0-162-240.ec2.internal:8 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2024-01-26:20:08:43:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2024-01-26:20:08:43:INFO] Failed to parse hyperparameter objective value multi:softmax to Json.[0m
[34mReturning the value itself[0m
[34m[2024-01-26:20:08:43:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2024-01-26:20:08:43:INFO]

In [14]:
from sagemaker.serializers import CSVSerializer

predictor = estimator.deploy(
    initial_instance_count=1,
    instance_type='ml.t2.medium',
    endpoint_name='iris-endpoint',
    serializer=CSVSerializer()
)

INFO:sagemaker:Creating model with name: sagemaker-xgboost-2024-01-26-20-22-07-443
INFO:sagemaker:Creating endpoint-config with name iris-endpoint
INFO:sagemaker:Creating endpoint with name iris-endpoint


------------!

In [23]:
predictor.predict([7.2, 3, 6, 1.6])

b'2.0\n'

In [26]:
predictor.predict([7.2, 3, 6, 1.6]).decode('utf-8').strip()

'2.0'