# UDACITY SageMaker Essentials: Training Job Demo

In [1]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker import image_uris
from sagemaker.predictor import csv_serializer

session = sagemaker.Session()

role = get_execution_role()

# If you're following along, you'll need to upload these datasets to your own bucket in S3. 

test_location = 's3://sagemaker-us-west-2-565094796913/boston-xgboost-HL/test.csv'
val_location = 's3://sagemaker-us-west-2-565094796913/boston-xgboost-HL/validation.csv'
train_location = 's3://sagemaker-us-west-2-565094796913/boston-xgboost-HL/train.csv'

# We use this prefix to help us determine where the output will go. 

prefix = 's3://sagemaker-us-west-2-565094796913/'

# We need to get the location of the container. 

container = image_uris.retrieve('xgboost', session.boto_region_name, version='latest')

# Now that we know which container to use, we can construct the estimator object.
xgb = sagemaker.estimator.Estimator(container, # The image name of the training container
                                    role,      # The IAM role to use (our current role in this case)
                                    instance_count=1, # The number of instances to use for training
                                    instance_type='ml.m4.xlarge', # The type of instance to use for training
                                    output_path='s3://{}/{}/output'.format(session.default_bucket(), prefix),
                                                                        # Where to save the output (the model artifacts)
                                    sagemaker_session=session) # The current SageMaker session
             
# These hyperparameters are beyond the scope of this course, but you can research the algoirthm here: 
# https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost.html    
    
xgb.set_hyperparameters(max_depth=5,
                        eta=0.2,
                        gamma=4,
                        min_child_weight=6,
                        subsample=0.8,
                        objective='reg:linear',
                        early_stopping_rounds=10,
                        num_round=200)
                        
s3_input_train = sagemaker.inputs.TrainingInput(s3_data=train_location, content_type='csv')
s3_input_validation = sagemaker.inputs.TrainingInput(s3_data=val_location, content_type='csv')

# The fit method launches the training job. 

xgb.fit({'train': s3_input_train, 'validation': s3_input_validation})


ClientError: An error occurred (ValidationException) when calling the CreateTrainingJob operation: No S3 objects found under S3 URL "s3://sagemaker-us-west-2-565094796913/boston-xgboost-HL/train.csv" given in input data source. Please ensure that the bucket exists in the selected region (us-east-1), that objects exist under that S3 prefix, and that the role "arn:aws:iam::277186808238:role/service-role/AmazonSageMaker-ExecutionRole-20230103T170999" has "s3:ListBucket" permissions on bucket "sagemaker-us-west-2-565094796913". Error message from S3: The bucket is in this region: us-west-2. Please use this region to retry the request