In [4]:
import sagemaker
import boto3
from sagemaker.image_uris import retrieve
from sagemaker.inputs import TrainingInput
from sagemaker import image_uris

sess = sagemaker.Session()
bucket = "ads-508-team4"
prefix = "autopilot"
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name="sagemaker", region_name=region)
s3 = boto3.Session().client(service_name="s3", region_name=region)

# initialize hyperparameters
hyperparameters = {
        "max_depth":"5",
        "eta":"0.2",
        "gamma":"4",
        "min_child_weight":"6",
        "subsample":"0.7",
        "objective":"reg:squarederror",
        "num_round":"50"}

# set an output path where the trained model will be saved
bucket = "ads-508-team4"
prefix = 'xgboost_builtin_algorithm'
output_path = 's3://ads-508-team4/xgboost/output'.format(bucket, prefix, 'abalone-xgb-built-in-algo')

# Container Making/Getting

In [6]:
xgboost_container = sagemaker.image_uris.retrieve("xgboost", region, "1.2-2")
print(xgboost_container)

683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.2-2


# SageMaker Estimator to Call xgboost_container

In [7]:
estimator = sagemaker.estimator.Estimator(image_uri=xgboost_container, 
                                          hyperparameters=hyperparameters,
                                          role=sagemaker.get_execution_role(),
                                          instance_count=1, 
                                          instance_type='ml.m5.large', 
                                          volume_size=5, # 5 GB 
                                          output_path=output_path)

# Defining data types and paths for training and validation datasets

In [8]:
content_type = "csv"
train_input = TrainingInput("s3://ads-508-team4/modeling_groups/train_data.csv".format(bucket, prefix, 'train'), content_type=content_type)
validation_input = TrainingInput("s3://ads-508-team4/modeling_groups/validate_data.csv".format(bucket, prefix, 'validation'), content_type=content_type)

# Execute the XGBoost Training Job

In [9]:
estimator.fit({'train': train_input, 'validation': validation_input})



2022-03-29 06:49:07 Starting - Starting the training job...
2022-03-29 06:49:31 Starting - Preparing the instances for trainingProfilerReport-1648536547: InProgress
......
2022-03-29 06:50:37 Downloading - Downloading input data......
2022-03-29 06:51:38 Training - Downloading the training image......
2022-03-29 06:52:34 Training - Training image download completed. Training in progress..[34m[2022-03-29 06:52:37.971 ip-10-0-229-210.ec2.internal:1 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2022-03-29:06:52:38:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2022-03-29:06:52:38:INFO] Failed to parse hyperparameter objective value reg:squarederror to Json.[0m
[34mReturning the value itself[0m
[34m[2022-03-29:06:52:38:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2022-03-29:06:52:38:INFO] Running XGBoost Sagemaker in algorithm mode[0m
[34m[2022-03-29:06:52:38:INFO] Determined delimiter of CSV input is ','[0m
[34m[2022-03

In [10]:
# assistance from: https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost.html

# Shutting down resources

In [11]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>

In [None]:
%%javascript

try {
    Jupyter.notebook.save_checkpoint();
    Jupyter.notebook.session.delete();
}
catch(err) {
    // NoOp
}