In [None]:
import os
import numpy as np
import tempfile

import tensorflow as tf

import sagemaker
import boto3
from sagemaker.estimator import Estimator

import subprocess


In [None]:
region = boto3.Session().region_name

sagemaker_session = sagemaker.Session()
smclient = boto3.client('sagemaker')

bucket = sagemaker.Session().default_bucket()  # s3 bucket name, must be in the same region as the one specified above
prefix = 'sagemaker/DEMO-hpo-keras-cifar10'

role = sagemaker.get_execution_role()

NUM_CLASSES = 10   # the data set has 10 categories of images

In [None]:


def get_image_name(ecr_repository, tensorflow_version_tag):
    return '%s:tensorflow-%s' % (ecr_repository, tensorflow_version_tag)

def build_image(name, version):
    cmd = 'docker build -t %s --build-arg VERSION=%s -f Dockerfile .' % (name, version)
    subprocess.check_call(shlex.split(cmd))

#version tag can be found at https://hub.docker.com/r/tensorflow/tensorflow/tags/
#e.g., latest cpu version is 'latest', while latest gpu version is 'latest-gpu'
tensorflow_version_tag = '1.10.1'

account = boto3.client('sts').get_caller_identity()['Account']

domain = 'amazonaws.com'
if (region == 'cn-north-1' or region == 'cn-northwest-1'):
    domain = 'amazonaws.com.cn'

ecr_repository="%s.dkr.ecr.%s.%s/test" %(account,region,domain) # your ECR repository, which you should have been created before running the notebook

image_name = get_image_name(ecr_repository, tensorflow_version_tag)

print('building image:'+image_name)
build_image(image_name, tensorflow_version_tag)

In [None]:
hyperparameters = dict(batch_size=32, data_augmentation=True, learning_rate=.0001,
                       width_shift_range=.1, height_shift_range=.1, epochs=1)
hyperparameters

In [None]:
%%time

output_location = "s3://{}/{}/output".format(bucket,prefix)

estimator = Estimator(image_name, role=role, output_path=output_location,
                      train_instance_count=1,
                      train_instance_type='local', hyperparameters=hyperparameters)
estimator.fit(channels)

In [None]:
# The name of our algorithm
algorithm_name = 'test'

# If the repository doesn't exist in ECR, create it.
exist_repo = !aws ecr describe-repositories --repository-names {algorithm_name} > /dev/null 2>&1

if not exist_repo:
    !aws ecr create-repository --repository-name {algorithm_name} > /dev/null

# Get the login command from ECR and execute it directly
!$(aws ecr get-login --region {region} --no-include-email)

!docker push {image_name}

In [None]:
import json
from time import gmtime, strftime

tuning_job_name = 'BYO-keras-tuningjob-' + strftime("%d-%H-%M-%S", gmtime())

print(tuning_job_name)

tuning_job_config = {
    "ParameterRanges": {
      "CategoricalParameterRanges": [],
      "ContinuousParameterRanges": [
        {
          "MaxValue": "0.001",
          "MinValue": "0.0001",
          "Name": "learning_rate",
        }
      ],
      "IntegerParameterRanges": []
    },
    "ResourceLimits": {
      "MaxNumberOfTrainingJobs": 9,
      "MaxParallelTrainingJobs": 3
    },
    "Strategy": "Bayesian",
    "HyperParameterTuningJobObjective": {
      "MetricName": "loss",
      "Type": "Minimize"
    }
  }

In [None]:
training_image = image_name

print('training artifacts will be uploaded to: {}'.format(output_location))

training_job_definition = {
    "AlgorithmSpecification": {
      "MetricDefinitions": [
        {
          "Name": "loss",
          "Regex": "loss: ([0-9\\.]+)"
        }
      ],
      "TrainingImage": training_image,
      "TrainingInputMode": "File"
    },
    "InputDataConfig": [
        {
            "ChannelName": "train",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": channels['train'],
                    "S3DataDistributionType": "FullyReplicated"
                }
            },
            "CompressionType": "None",
            "RecordWrapperType": "None"
        },
        {
            "ChannelName": "test",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": channels['test'],
                    "S3DataDistributionType": "FullyReplicated"
                }
            },
            "CompressionType": "None",
            "RecordWrapperType": "None"
        }
    ],
    "OutputDataConfig": {
      "S3OutputPath": "s3://{}/{}/output".format(bucket,prefix)
    },
    "ResourceConfig": {
      "InstanceCount": 1,
      "InstanceType": "ml.m4.xlarge",
      "VolumeSizeInGB": 50
    },
    "RoleArn": role,
    "StaticHyperParameters": {
        "batch_size":"32",
        "data_augmentation":"True",
        "height_shift_range":"0.1",
        "width_shift_range":"0.1",
        "epochs":'1'
    },
    "StoppingCondition": {
      "MaxRuntimeInSeconds": 43200
    }
}

In [None]:
smclient.describe_hyper_parameter_tuning_job(HyperParameterTuningJobName = tuning_job_name)['HyperParameterTuningJobStatus']