In [28]:
import sagemaker
import boto3
import numpy as np                                
import pandas as pd                               
import os 
from sagemaker import tuner

region = boto3.Session().region_name    
client = boto3.Session().client('sagemaker')
session = sagemaker.Session()
role = sagemaker.get_execution_role()

In [29]:
%store -r s3_bucket_name
s3_bucket_name

'sagemaker-cookbook-bucket'

In [30]:
%store -r prefix
prefix

'chapter06/input'

In [31]:
training_s3_input_location = \
f"s3://{s3_bucket_name}/{prefix}/training_data_no_header.csv" 
validation_s3_input_location = \
f"s3://{s3_bucket_name}/{prefix}/validation_data_no_header.csv" 
test_s3_input_location = \
f"s3://{s3_bucket_name}/{prefix}/test_data_no_header.csv" 
training_s3_output_location = \
f"s3://{s3_bucket_name}/output/"

In [32]:
from sagemaker.image_uris import retrieve
container = retrieve('xgboost', region, version="0.90-2")
container

'683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:0.90-2-cpu-py3'

In [33]:
from sagemaker.inputs import TrainingInput
    
s3_input_training = TrainingInput(
    training_s3_input_location, 
    content_type="text/csv")
s3_input_validation = TrainingInput(
    validation_s3_input_location, 
    content_type="text/csv")

In [34]:
estimator = sagemaker.estimator.Estimator( 
            container,
            role,
            instance_count=1, 
            instance_type='ml.m5.large', 
            output_path=training_s3_output_location, 
            sagemaker_session=session)

In [35]:
estimator.set_hyperparameters(
    eval_metric='auc',
    objective='binary:logistic',
    num_round=50)

In [36]:
hyperparameter_ranges = {
    'eta': tuner.ContinuousParameter(0, 1),
    'min_child_weight': tuner.ContinuousParameter(3, 7),
    'max_depth': tuner.IntegerParameter(2, 8)
}

In [37]:
objective_metric_name = 'validation:auc'

In [38]:
hyperparameter_tuner = tuner.HyperparameterTuner(
    estimator,
    objective_metric_name,
    hyperparameter_ranges,
    max_jobs=20,
    max_parallel_jobs=3)

In [39]:
hyperparameter_tuner.fit(
    {'train': s3_input_training, 
     'validation': s3_input_validation},
    include_cls_metadata=False,
    wait=False
)

In [40]:
job_name = hyperparameter_tuner.latest_tuning_job.job_name

response = client.describe_hyper_parameter_tuning_job(
    HyperParameterTuningJobName=job_name
)

response['HyperParameterTuningJobStatus']

'InProgress'

In [41]:
from pprint import pprint
pprint(response)

{'CreationTime': datetime.datetime(2021, 5, 21, 14, 8, 43, 808000, tzinfo=tzlocal()),
 'HyperParameterTuningJobArn': 'arn:aws:sagemaker:us-east-1:581320662326:hyper-parameter-tuning-job/sagemaker-xgboost-210521-1408',
 'HyperParameterTuningJobConfig': {'HyperParameterTuningJobObjective': {'MetricName': 'validation:auc',
                                                                        'Type': 'Maximize'},
                                   'ParameterRanges': {'CategoricalParameterRanges': [],
                                                       'ContinuousParameterRanges': [{'MaxValue': '1',
                                                                                      'MinValue': '0',
                                                                                      'Name': 'eta',
                                                                                      'ScalingType': 'Auto'},
                                                                               

In [43]:
tuning_job_name = response['HyperParameterTuningJobName']
%store tuning_job_name
tuning_job_name

Stored 'tuning_job_name' (str)


'sagemaker-xgboost-210521-1408'