# Performing Automatic Model Tuning with the SageMaker XGBoost Built-in Algorithm

<img align="left" width="130" src="https://raw.githubusercontent.com/PacktPublishing/Amazon-SageMaker-Cookbook/master/Extra/cover-small-padded.png"/>

This notebook contains the code to help readers work through one of the recipes of the book [Machine Learning with Amazon SageMaker Cookbook: 80 proven recipes for data scientists and developers to perform ML experiments and deployments](https://www.amazon.com/Machine-Learning-Amazon-SageMaker-Cookbook/dp/1800567030)

### How to do it...

In [None]:
import sagemaker
import boto3
import numpy as np                                
import pandas as pd                               
import os 
from sagemaker import tuner

region = boto3.Session().region_name    
client = boto3.Session().client('sagemaker')
session = sagemaker.Session()
role = sagemaker.get_execution_role()

In [None]:
%store -r s3_bucket_name
s3_bucket_name

In [None]:
%store -r prefix
prefix

In [None]:
training_s3_input_location = \
f"s3://{s3_bucket_name}/{prefix}/training_data_no_header.csv" 
validation_s3_input_location = \
f"s3://{s3_bucket_name}/{prefix}/validation_data_no_header.csv" 
test_s3_input_location = \
f"s3://{s3_bucket_name}/{prefix}/test_data_no_header.csv" 
training_s3_output_location = \
f"s3://{s3_bucket_name}/output/"

In [None]:
from sagemaker.image_uris import retrieve
container = retrieve('xgboost', region, version="0.90-2")
container

In [None]:
from sagemaker.inputs import TrainingInput
    
s3_input_training = TrainingInput(
    training_s3_input_location, 
    content_type="text/csv")
s3_input_validation = TrainingInput(
    validation_s3_input_location, 
    content_type="text/csv")

In [None]:
estimator = sagemaker.estimator.Estimator( 
            container,
            role,
            instance_count=1, 
            instance_type='ml.m5.large', 
            output_path=training_s3_output_location, 
            sagemaker_session=session)

In [None]:
estimator.set_hyperparameters(
    eval_metric='auc',
    objective='binary:logistic',
    num_round=50)

In [None]:
hyperparameter_ranges = {
    'eta': tuner.ContinuousParameter(0, 1),
    'min_child_weight': tuner.ContinuousParameter(3, 7),
    'max_depth': tuner.IntegerParameter(2, 8)
}

In [None]:
objective_metric_name = 'validation:auc'

In [None]:
hyperparameter_tuner = tuner.HyperparameterTuner(
    estimator,
    objective_metric_name,
    hyperparameter_ranges,
    max_jobs=20,
    max_parallel_jobs=3)

In [None]:
hyperparameter_tuner.fit(
    {'train': s3_input_training, 
     'validation': s3_input_validation},
    include_cls_metadata=False,
    wait=False
)

In [None]:
job_name = hyperparameter_tuner.latest_tuning_job.job_name

response = client.describe_hyper_parameter_tuning_job(
    HyperParameterTuningJobName=job_name
)

response['HyperParameterTuningJobStatus']

In [None]:
from pprint import pprint
pprint(response)

In [None]:
tuning_job_name = response['HyperParameterTuningJobName']
%store tuning_job_name
tuning_job_name