# Basic model training

This notebook is split into two sections: The first section creates a basic model training job that trains a single model using the user defined hyperparameters, and a second section that starts a hyperparameter training job to search for the best hyperparameters with a given user defined range.

Import the required packages and create an execution role to handle permissions

In [None]:
import os
import sagemaker
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()

role = get_execution_role()
region = sagemaker_session.boto_session.region_name
region

Specify the path to the training and validation data stored in S3

In [None]:
training_data_uri = 's3://<path to training data>>'
validation_data_uri = 's3://<path to validation data>'
fit_input = {'train': training_data_uri, 'validation': validation_data_uri}

Specify the hyperparameters and define the sagemaker Tensorflow estimator

In [3]:
from sagemaker.tensorflow import TensorFlow

# define the specific hyperparameters used for model training.
hyperparameters = {
    'batch-size': 32,
    'fine-tune-learning-rate': 1.0e-04,
    'fine-tune-epochs': 1,
    'fine-tune-layer': 100,
}

metric_definitions = [
    {'Name': 'validation accuracy', 'Regex': 'val_accuracy: ([0-9\\.]+)'},
    {'Name': 'training accuracy', 'Regex': ' accuracy: ([0-9\\.]+)'},
]

# specify the custom training module to be used e.g. xception_sagemaker_training.py or mobilenet_v3_sagemaker_training.py
tf_estimator = TensorFlow(
    entry_point='Xception_sagemaker_training.py',
    role=role,
    instance_count=1,
    instance_type='ml.p3.2xlarge',
    framework_version='2.4.1',
    py_version='py37',
    hyperparameters=hyperparameters,
    metric_definitions=metric_definitions
)

Train the model

In [None]:
tf_estimator.fit(fit_input)

Uncomment the below to deploy the trained model

In [None]:
# tf_estimator.deploy(initial_instance_count=1, instance_type="ml.t2.large")

## Hyperparameter Tuning

In [4]:
from sagemaker.tuner import ContinuousParameter, CategoricalParameter, IntegerParameter, HyperparameterTuner

# specify the range for the different parameters
hyperparamter_range = {
    'batch-size': CategoricalParameter([16,32,64, 128]),
    'fine-tune-learning-rate': ContinuousParameter(1e-5, 1e-3),
    'fine-tune-epochs': IntegerParameter(3, 10),
    'fine-tune-layer': IntegerParameter(0, 100)
}

# more parameters can be added by adding arguments into the entry_point script e.g. Xception_sagemaker_training.py

In [5]:
objective_metric_name = 'validation accuracy'
hyp_metric_definitions = [{'Name': 'validation accuracy', 'Regex': 'val_accuracy: ([0-9\\.]+)'}]

Create the hyperparameter runing job

In [None]:
tuner = HyperparameterTuner(
    tf_estimator,
    objective_metric_name,
    hyperparamter_range,
    hyp_metric_definitions,
    max_jobs=20,
    max_parallel_jobs=2
)

tuner.fit(fit_input)

Get information about the job that's just been run

In [None]:
tuner.analytics()

<sagemaker.HyperparameterTuningJobAnalytics for tensorflow-training-210416-1052>

In [None]:
tuner.describe()

Retrieve the best model based on the specified objective metric

In [None]:
tuner.best_estimator()