# XGBoost - Two different models
### Train bike rental dataset with different hyperparameters

SageMaker SDK Documentation: https://sagemaker.readthedocs.io/en/latest/overview.html

In [1]:
import numpy as np
import pandas as pd

import boto3
import re

import sagemaker
from sagemaker import get_execution_role

## Upload Data to S3

In [2]:
# Specify your bucket name
bucket_name = 'chandra-ml-sagemaker'

training_folder = r'bikerental-hyper/training/'
validation_folder = r'bikerental-hyper/validation/'
test_folder = r'bikerental-hyper/test/'

s3_model_output_location = r's3://{0}/bikerental-hyper/model'.format(bucket_name)
s3_training_file_location = r's3://{0}/{1}'.format(bucket_name,training_folder)
s3_validation_file_location = r's3://{0}/{1}'.format(bucket_name,validation_folder)
s3_test_file_location = r's3://{0}/{1}'.format(bucket_name,test_folder)

In [3]:
print(s3_model_output_location)
print(s3_training_file_location)
print(s3_validation_file_location)
print(s3_test_file_location)

s3://chandra-ml-sagemaker/bikerental-hyper/model
s3://chandra-ml-sagemaker/bikerental-hyper/training/
s3://chandra-ml-sagemaker/bikerental-hyper/validation/
s3://chandra-ml-sagemaker/bikerental-hyper/test/


In [4]:
# Data Channels for the training algorithm
# content type can be libsvm or csv for XGBoost
training_input_config = sagemaker.session.TrainingInput(
    s3_data=s3_training_file_location,
    content_type='csv',
    s3_data_type='S3Prefix')

validation_input_config = sagemaker.session.TrainingInput(
    s3_data=s3_validation_file_location,
    content_type='csv',
    s3_data_type='S3Prefix'
)

data_channels = {'train': training_input_config, 'validation': validation_input_config}

In [5]:
# Write and Reading from S3 is just as easy
# files are referred as objects in S3.  
# file name is referred as key name in S3

# File stored in S3 is automatically replicated across 3 different availability zones 
# in the region where the bucket was created.

# http://boto3.readthedocs.io/en/latest/guide/s3.html
def write_to_s3(filename, bucket, key):
    with open(filename,'rb') as f: # Read in binary mode
        return boto3.Session().resource('s3').Bucket(bucket).Object(key).upload_fileobj(f)

In [6]:
write_to_s3('bike_train.csv', 
            bucket_name,
            training_folder + 'bike_train.csv')

write_to_s3('bike_validation.csv',
            bucket_name,
            validation_folder + 'bike_validation.csv')

write_to_s3('bike_test.csv',
            bucket_name,
            test_folder + 'bike_test.csv')

## Training Algorithm Docker Image
### SageMaker maintains a separate image for algorithm and region
https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html

In [7]:
# Use Spot Instance - Save up to 90% of training cost by using spot instances when compared to on-demand instances
# Reference: https://github.com/aws-samples/amazon-sagemaker-managed-spot-training/blob/main/xgboost_built_in_managed_spot_training_checkpointing/xgboost_built_in_managed_spot_training_checkpointing.ipynb

# if you are still on two-month free-tier you can use the on-demand instance by setting:
#   use_spot_instances = False

# We will use spot for training
use_spot_instances = True
max_run = 3600 # in seconds
max_wait = 7200 if use_spot_instances else None # in seconds

In [8]:
def fit_model(jobSuffix, hyperParameters):
    # Establish a session with AWS
    sess = sagemaker.Session()
    # This role contains the permissions needed to train, deploy models    
    role = get_execution_role()
    
    # job name has the version details - replace "." with "-"
    job_name = f"xgboost-bikerental-{jobSuffix}"
    
    print (f"***Job name: {job_name}***")
    
    checkpoint_s3_uri = None

    if use_spot_instances:
        checkpoint_s3_uri = f's3://{bucket_name}/bikerental-hyper/checkpoints/{job_name}'

    print (f'Checkpoint uri: {checkpoint_s3_uri}')
            
    # Use the specified version of XGBoost
    container = sagemaker.image_uris.retrieve("xgboost",
                                              sess.boto_region_name,
                                              version="1.2-2")

    print (f'Using XGBoost Container {container}')
    
    # Configure the training job
    # for managed spot training, specify the use_spot_instances flag, max_run, max_wait and checkpoint_s3_uri
    estimator = sagemaker.estimator.Estimator(
        container,
        role,
        instance_count=1,
        instance_type='ml.m5.xlarge',
        output_path=s3_model_output_location,
        sagemaker_session=sess,
        base_job_name = job_name,
        hyperparameters = hyperParameters,
        use_spot_instances=use_spot_instances,
        max_run=max_run,
        max_wait=max_wait,
        checkpoint_s3_uri=checkpoint_s3_uri)

    estimator.fit(data_channels)

    return estimator.latest_training_job.name

In [9]:
# XGBoost hyperparameters
hyperParametersOne = {"objective":"reg:squarederror",                   
                      "eta":0.1,
                      "max_depth":5,
                      "num_round":150}

# Adjust depth and round
hyperParametersTwo = {"objective":"reg:squarederror",
                      "eta":0.1,
                      "max_depth":7,
                      "num_round":200}

In [10]:
fit_model(jobSuffix="hyper-one", hyperParameters=hyperParametersOne)

***Job name: xgboost-bikerental-hyper-one***
Checkpoint uri: s3://chandra-ml-sagemaker/bikerental-hyper/checkpoints/xgboost-bikerental-hyper-one
Using XGBoost Container 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.2-2
2021-06-29 19:03:52 Starting - Starting the training job...
2021-06-29 19:04:15 Starting - Launching requested ML instancesProfilerReport-1624993431: InProgress
......
2021-06-29 19:05:16 Starting - Preparing the instances for training.........
2021-06-29 19:06:36 Downloading - Downloading input data...
2021-06-29 19:07:16 Training - Training image download completed. Training in progress.
2021-06-29 19:07:16 Uploading - Uploading generated training model[34m[2021-06-29 19:07:11.405 ip-10-2-232-112.ec2.internal:1 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2021-06-29:19:07:11:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2021-06-29:19:07:11:INFO] Failed to parse hyperparameter objective value reg:squareder


2021-06-29 19:07:36 Completed - Training job completed
Training seconds: 51
Billable seconds: 18
Managed Spot Training savings: 64.7%


'xgboost-bikerental-hyper-one-2021-06-29-19-03-51-772'

In [11]:
fit_model(jobSuffix="hyper-two", hyperParameters=hyperParametersTwo)

***Job name: xgboost-bikerental-hyper-two***
Checkpoint uri: s3://chandra-ml-sagemaker/bikerental-hyper/checkpoints/xgboost-bikerental-hyper-two
Using XGBoost Container 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.2-2
2021-06-29 19:08:04 Starting - Starting the training job...
2021-06-29 19:08:29 Starting - Launching requested ML instancesProfilerReport-1624993684: InProgress
......
2021-06-29 19:09:30 Starting - Preparing the instances for training.........
2021-06-29 19:10:50 Downloading - Downloading input data...
2021-06-29 19:11:31 Training - Training image download completed. Training in progress..[34m[2021-06-29 19:11:31.598 ip-10-0-175-221.ec2.internal:1 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2021-06-29:19:11:31:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2021-06-29:19:11:31:INFO] Failed to parse hyperparameter objective value reg:squarederror to Json.[0m
[34mReturning the value itself[0m
[34m[2021-06


2021-06-29 19:11:50 Uploading - Uploading generated training model
2021-06-29 19:11:50 Completed - Training job completed
Training seconds: 62
Billable seconds: 22
Managed Spot Training savings: 64.5%


'xgboost-bikerental-hyper-two-2021-06-29-19-08-04-688'