In [75]:
import boto3
import sagemaker

In [76]:
from sagemaker.session import TrainingInput
from sagemaker import image_uris
from sagemaker import hyperparameters

In [77]:
import logging

In [78]:
boto3.set_stream_logger(name='botocore.credentials', level=logging.WARNING)

In [79]:
region = sagemaker.Session().boto_region_name
print(region)

us-east-1


In [80]:
import os

In [81]:
# role_arn = os.getenv('SGMKR_ROLE_ARN')
role_arn = sagemaker.get_execution_role()

In [82]:
print(role_arn)

arn:aws:iam::216393269487:role/service-role/AmazonSageMaker-ExecutionRole-20240925T153897


In [113]:
bucket = 'sagemakerlearningwithiris'
prefix = 'iris'

In [114]:
#Get the ls of objects in the Bucket with name bucket and prefix
!aws s3 ls {bucket}/{prefix}/

                           PRE batch_transform/
                           PRE data/


In [115]:
!aws s3 ls {bucket}/{prefix}/data/ --recursive

2024-09-25 19:10:22        540 iris/data/iris_test.csv
2024-09-25 19:10:21       2160 iris/data/iris_train.csv


In [116]:
train_file = 'data/iris_train.csv'
validation_file = 'data/iris_test.csv'

train_file_uri = 's3://{}/{}/{}'.format(bucket, prefix, train_file)
test_file_uri = 's3://{}/{}/{}'.format(bucket, prefix, validation_file)
print(train_file_uri)
print(test_file_uri)

s3://sagemakerlearningwithiris/iris/data/iris_train.csv
s3://sagemakerlearningwithiris/iris/data/iris_test.csv


In [117]:
train_ip = TrainingInput(train_file_uri, content_type='csv')
test_ip = TrainingInput(test_file_uri, content_type='csv')
print(train_ip)
print(test_ip)

<sagemaker.inputs.TrainingInput object at 0x7fa28e904670>
<sagemaker.inputs.TrainingInput object at 0x7fa28e904580>


In [118]:
model_op = 's3://{}/{}/{}'.format(bucket, prefix, 'model')
print(model_op)

s3://sagemakerlearningwithiris/iris/model


In [97]:
model_img = sagemaker.image_uris.retrieve('xgboost', region, 'latest')
print(model_img)

INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest


In [119]:
job_name = 'iris-xgboost'
xgb_model = sagemaker.estimator.Estimator(
    image_uri=model_img,
    role = role_arn,
    base_job_name=job_name,
    instance_count=1,
    instance_type='ml.m5.4xlarge',
    output_path=model_op,
    sagemaker_session=sagemaker.Session(),
    volume_size=5
)

In [121]:
# xgb_model.set_hyperparameters(num_class=3, max_depth=5, num_round=10, objective='multi:softmax')
# xgb_model.set_hyperparameters(num_class=3, max_depth=5, num_round=10, objective='multi:softmax')
xgb_model.set_hyperparameters(num_class=3, max_depth=5, num_round=10, objective='multi:softmax')


In [122]:
from datetime import datetime
name_job = job_name + datetime.today().strftime("%Y-%m-%d-%H-%M-%S")
print(name_job)

iris-xgboost2024-09-25-19-15-18


In [123]:
xgb_model.fit({"train":train_ip, "validation":test_ip}, wait=True, job_name=name_job)

INFO:sagemaker:Creating training-job with name: iris-xgboost2024-09-25-19-15-18


2024-09-25 19:15:25 Starting - Starting the training job...
2024-09-25 19:15:39 Starting - Preparing the instances for training...
2024-09-25 19:16:17 Downloading - Downloading input data...
2024-09-25 19:16:37 Downloading - Downloading the training image...
2024-09-25 19:17:07 Training - Training image download completed. Training in progress..[34mArguments: train[0m
[34m[2024-09-25:19:17:26:INFO] Running standalone xgboost training.[0m
[34m[2024-09-25:19:17:26:INFO] File size need to be processed in the node: 0.0mb. Available memory size in the node: 55450.48mb[0m
[34m[2024-09-25:19:17:26:INFO] Determined delimiter of CSV input is ','[0m
[34m[19:17:26] S3DistributionType set as FullyReplicated[0m
[34m[19:17:26] 120x4 matrix with 480 entries loaded from /opt/ml/input/data/train?format=csv&label_column=0&delimiter=,[0m
[34m[2024-09-25:19:17:26:INFO] Determined delimiter of CSV input is ','[0m
[34m[19:17:26] S3DistributionType set as FullyReplicated[0m
[34m[19:17:26] 30

In [124]:
!aws s3 ls s3://{bucket}/{prefix}/model/

                           PRE iris-xgboost2024-09-25-19-02-02/
                           PRE iris-xgboost2024-09-25-19-15-18/


In [127]:
!aws s3 ls s3://{bucket}/{prefix}/model/{name_job}/

                           PRE debug-output/
                           PRE output/
                           PRE profiler-output/
