# Tuning for Performance

In this unit, we focus on enhancing machine learning models with SageMaker's **Hyperparameter Tuning**. Learn how to fine-tune your model's settings for better accuracy and integrate these optimizations into your pipelines for seamless automation. Simplified and practical, this module will equip you with the skills to improve model performance efficiently.

We begin by uploading the usual setup:

In [1]:
import boto3
import sagemaker
from utils.helpers import get_secret

sm_client = boto3.client('sagemaker')
session = sagemaker.Session()
role = get_secret('role_arn')
s3_bucket_uri = get_secret('s3_bucket_uri')
s3_bucket_name = get_secret('s3_bucket_name')

image_uri = sagemaker.image_uris.retrieve('xgboost',
                                          region='us-east-1',
                                          version='1.5-1')

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [2]:
estimator = sagemaker.estimator.Estimator(
    image_uri=image_uri,
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    output_path=f"{s3_bucket_uri}/pipelines-output",
    sagemaker_session=session,
    hyperparameters={
        'objective': 'multi:softmax',
        'num_class': 3,
        'num_round': 10,
    }
)

[Scaling type](https://docs.aws.amazon.com/sagemaker/latest/dg/automatic-model-tuning-define-ranges.html#scaling-type)

In [3]:
from sagemaker.tuner import IntegerParameter, HyperparameterTuner

hyperparameter_ranges = {
    'max_depth' : IntegerParameter(1, 10, scaling_type = 'Auto')
}

A list of metrics for the XGBoost model is found [here (AWS Docs)](https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost-tuning.html#xgboost-metrics) and [here (GitHub)](https://github.com/dmlc/xgboost/blob/master/doc/parameter.rst#learning-task-parameters) 

In [4]:
hyperparameter_tuner = HyperparameterTuner(
    estimator=estimator,
    objective_metric_name='validation:merror',
    hyperparameter_ranges=hyperparameter_ranges,
    strategy='Random',
    max_jobs=10,
    max_parallel_jobs=5,
    objective_type='Minimize'
)

from sagemaker.inputs import TrainingInput

s3_train = TrainingInput(
    s3_data=f's3://{s3_bucket_name}/iris_dataset/train_data.csv',
    content_type='csv'
)

s3_validate = TrainingInput(
    s3_data=f's3://{s3_bucket_name}/iris_dataset/test_data.csv',
    content_type='csv'
)

In [5]:
hyperparameter_tuner.fit({
    'train': s3_train,
    'validation': s3_validate
})

No finished training job found associated with this estimator. Please make sure this estimator is only used for building workflow config
No finished training job found associated with this estimator. Please make sure this estimator is only used for building workflow config


...........................................................!


In [6]:
from sagemaker.analytics import HyperparameterTuningJobAnalytics

tuning_job_name = 'sagemaker-xgboost-240202-2315'

tuning_job_result = HyperparameterTuningJobAnalytics(tuning_job_name, sagemaker_session=session)

job_summaries = tuning_job_result.training_job_summaries()

job_summaries[0].keys()

dict_keys(['TrainingJobName', 'TrainingJobArn', 'CreationTime', 'TrainingStartTime', 'TrainingEndTime', 'TrainingJobStatus', 'TunedHyperParameters', 'FinalHyperParameterTuningJobObjectiveMetric', 'ObjectiveStatus'])

In [7]:
tuning_job_result.dataframe()

Unnamed: 0,max_depth,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
0,4.0,sagemaker-xgboost-240202-2315-010-64815582,Completed,0.03226,2024-02-02 23:19:34-05:00,2024-02-02 23:20:15-05:00,41.0
1,1.0,sagemaker-xgboost-240202-2315-009-784b844a,Completed,0.06452,2024-02-02 23:19:32-05:00,2024-02-02 23:20:14-05:00,42.0
2,3.0,sagemaker-xgboost-240202-2315-008-f1f79461,Completed,0.06452,2024-02-02 23:19:27-05:00,2024-02-02 23:20:04-05:00,37.0
3,5.0,sagemaker-xgboost-240202-2315-007-6288d9c9,Completed,0.03226,2024-02-02 23:19:23-05:00,2024-02-02 23:20:05-05:00,42.0
4,7.0,sagemaker-xgboost-240202-2315-006-40a4723f,Completed,0.03226,2024-02-02 23:19:06-05:00,2024-02-02 23:19:43-05:00,37.0
5,9.0,sagemaker-xgboost-240202-2315-005-69134c8a,Completed,0.03226,2024-02-02 23:17:20-05:00,2024-02-02 23:19:22-05:00,122.0
6,8.0,sagemaker-xgboost-240202-2315-004-8862d949,Completed,0.03226,2024-02-02 23:17:11-05:00,2024-02-02 23:19:13-05:00,122.0
7,6.0,sagemaker-xgboost-240202-2315-003-8ba6b805,Completed,0.03226,2024-02-02 23:17:22-05:00,2024-02-02 23:19:24-05:00,122.0
8,2.0,sagemaker-xgboost-240202-2315-002-d283f75d,Completed,0.03226,2024-02-02 23:17:16-05:00,2024-02-02 23:19:18-05:00,122.0
9,10.0,sagemaker-xgboost-240202-2315-001-e3113228,Completed,0.03226,2024-02-02 23:17:05-05:00,2024-02-02 23:18:57-05:00,112.0


In [8]:
from sagemaker.workflow.pipeline_context import PipelineSession

pipeline_session = PipelineSession()

In [9]:
from sagemaker.workflow.parameters import (
    ParameterString,
    ParameterInteger
)

objective_param = ParameterString(name="objective",
                                  default_value='multi:softmax')
num_round_param = ParameterInteger(name="num_round",
                                   default_value=10)

estimator = sagemaker.estimator.Estimator(
    image_uri=image_uri,
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    output_path=f"{s3_bucket_uri}/pipelines-output",
    sagemaker_session=pipeline_session,
    hyperparameters={
        'objective': objective_param,
        'num_class': 3,
        'num_round': num_round_param,
    }
)

hyperparameter_tuner = HyperparameterTuner(
    estimator=estimator,
    objective_metric_name='validation:merror',
    hyperparameter_ranges=hyperparameter_ranges,
    strategy='Random',
    max_jobs=10,
    max_parallel_jobs=10,
    objective_type='Minimize'
)

In [10]:
from sagemaker.workflow.steps import TuningStep

tuning_step = TuningStep(
    name='tuning-step',
    step_args=hyperparameter_tuner.fit({
        'train': s3_train,
        'validation': s3_validate
    })
)



In [11]:
from sagemaker.predictor import Predictor
from sagemaker.model import Model
from sagemaker.workflow.model_step import ModelStep

best_model = Model(
    image_uri=image_uri,
    model_data=tuning_step.get_top_model_s3_uri(
        top_k=0, s3_bucket=f"{s3_bucket_name}/pipelines-output"
    ),
    predictor_cls=Predictor,
    sagemaker_session=pipeline_session,
    role=role,
)

register_params = {
    'model_package_group_name': 'iris-classification-group',
    'description': 'Top performing model from Hyperparameter tuning',
    'image_uri': image_uri,
    'task': 'CLASSIFICATION'
}

register_best_model_step = ModelStep(
    name="register-best-step",
    step_args=best_model.register(**register_params),
)


In [None]:
from sagemaker.workflow.pipeline import Pipeline

pipeline = Pipeline(
    name='iris-tuning-pipeline',
    steps=[tuning_step,
           register_best_model_step],
    parameters=[
        objective_param,
        num_round_param,
    ]
)

parallelism_config = {
    'MaxParallelExecutionSteps': 5
}

pipeline.create(
    role_arn=role,
    parallelism_config=parallelism_config,
    tags=[
        {'Key': 'Project', 'Value': 'AWSTutorials'},
        {'Key': 'Environment', 'Value': 'Development'}
    ]
)

pipeline.start(
    execution_display_name='pipeline-execution',
    execution_description='First Pipeline Execution'
)

## Cleanup

In [13]:
pipeline_names = [
    'iris-tuning-pipeline',
]

# Delete pipelines
for pipeline_name in pipeline_names:
    pipeline = Pipeline(pipeline_name, sagemaker_session=session)
    pipeline.delete()

INFO:sagemaker.workflow.pipeline:If triggers have been setup for this target, they will become orphaned.You will need to clean them up manually via the CLI or EventBridge console.


In [14]:
model_package_group_name = 'iris-classification-group'
model_packages = sm_client.list_model_packages(ModelPackageGroupName=model_package_group_name)

# Delete models
for model_package in model_packages['ModelPackageSummaryList']:
    model_package_arn = model_package['ModelPackageArn']
    sm_client.delete_model_package(ModelPackageName=model_package_arn)

# Delete model group
sm_client.delete_model_package_group(ModelPackageGroupName=model_package_group_name)

{'ResponseMetadata': {'RequestId': 'ac569426-360b-432d-8f0c-6ba801e11947',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'ac569426-360b-432d-8f0c-6ba801e11947',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Wed, 21 Feb 2024 03:21:15 GMT'},
  'RetryAttempts': 0}}