In [2]:
import boto3
import numpy as np
import os
import pathlib
import random
import tensorflow as tf
import time

from keras.preprocessing.image import img_to_array, load_img
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.tensorflow import TensorFlow
from sklearn.model_selection import train_test_split
from sagemaker.tuner import ContinuousParameter, HyperparameterTuner

Using TensorFlow backend.


# Converting images to the TFRecord format

## Get all image paths and shuffle them

In [2]:
data_root = pathlib.Path(os.path.join(os.getcwd(), 'images'))

In [3]:
all_image_paths = list(data_root.glob('*/*'))
all_image_paths = [str(path) for path in all_image_paths]
random.shuffle(all_image_paths)
image_count = len(all_image_paths)

In [4]:
all_image_paths[:10]

['/home/ec2-user/SageMaker/images/0/8864_idx5_x901_y1651_class0.png',
 '/home/ec2-user/SageMaker/images/1/12934_idx5_x1901_y701_class1.png',
 '/home/ec2-user/SageMaker/images/0/14157_idx5_x2651_y551_class0.png',
 '/home/ec2-user/SageMaker/images/0/8867_idx5_x1601_y401_class0.png',
 '/home/ec2-user/SageMaker/images/0/10305_idx5_x151_y1051_class0.png',
 '/home/ec2-user/SageMaker/images/0/9126_idx5_x1501_y801_class0.png',
 '/home/ec2-user/SageMaker/images/0/12751_idx5_x2001_y751_class0.png',
 '/home/ec2-user/SageMaker/images/0/14304_idx5_x1851_y1901_class0.png',
 '/home/ec2-user/SageMaker/images/0/12819_idx5_x2201_y2151_class0.png',
 '/home/ec2-user/SageMaker/images/1/14157_idx5_x2401_y1401_class1.png']

## Get all image labels

In [5]:
all_image_labels = [int(pathlib.Path(path).parent.name) for path in all_image_paths]

In [6]:
all_image_labels[:10]

[0, 1, 0, 0, 0, 0, 0, 0, 0, 1]

## Get a numpy array containing all images with their associated labels

In [7]:
channels = 3
image_height = 50
image_width = 50

In [8]:
dataset = np.ndarray(shape=(image_count, image_height, image_width, channels),
                     dtype=np.uint8)

i = 0
for file in all_image_paths:
    img = load_img(file)  # this is a PIL image
    img = img.resize((image_width, image_height))
    x = img_to_array(img, 'channels_last') 
    dataset[i] = x
    i += 1
    if i % 250 == 0:
        print("%d images to array" % i)
print("All images to array!")

250 images to array
500 images to array
750 images to array
1000 images to array
1250 images to array
1500 images to array
1750 images to array
2000 images to array
2250 images to array
2500 images to array
2750 images to array
3000 images to array
3250 images to array
3500 images to array
3750 images to array
4000 images to array
4250 images to array
4500 images to array
4750 images to array
5000 images to array
5250 images to array
5500 images to array
5750 images to array
6000 images to array
6250 images to array
6500 images to array
6750 images to array
7000 images to array
7250 images to array
7500 images to array
7750 images to array
8000 images to array
8250 images to array
8500 images to array
8750 images to array
9000 images to array
9250 images to array
9500 images to array
9750 images to array
10000 images to array
10250 images to array
10500 images to array
10750 images to array
11000 images to array
11250 images to array
11500 images to array
11750 images to array
12000 im

## Split dataset into train and test

In [9]:
X_train, X_test, y_train, y_test = train_test_split(dataset, all_image_labels, test_size=0.3, random_state=33)

## Convert images

In [10]:
def convert_to_tfrecord(images, labels, num_examples, name, directory):
    def _int64_feature(value):
        return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

    def _bytes_feature(value):
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

    if images.shape[0] != num_examples:
        raise ValueError('Images size %d does not match label size %d.' % (images.shape[0], num_examples))
    rows = images.shape[1]
    cols = images.shape[2]
    depth = images.shape[3]

    filename = os.path.join(directory, name + '.tfrecords')
    print('Writing', filename)
    writer = tf.python_io.TFRecordWriter(filename)
    for index in range(num_examples):
        image_raw = images[index].tobytes()
        example = tf.train.Example(features=tf.train.Features(feature={
            'height': _int64_feature(rows),
            'width': _int64_feature(cols),
            'depth': _int64_feature(depth),
            'label': _int64_feature(labels[index]),
            'image_raw': _bytes_feature(image_raw)}))
        writer.write(example.SerializeToString())
    writer.close()

In [11]:
convert_to_tfrecord(X_train, y_train, len(y_train), 'images_train', os.getcwd())

Writing /home/ec2-user/SageMaker/images_train.tfrecords


In [12]:
convert_to_tfrecord(X_test, y_test, len(y_test), 'images_test', os.getcwd())

Writing /home/ec2-user/SageMaker/images_test.tfrecords


## Upload the train and test .tfrecords files to S3

In [13]:
%%bash

aws s3 cp images_train.tfrecords s3://sagemaker-data-jv/breast-cancer-detection/input/tfrecord/train/
aws s3 cp images_test.tfrecords s3://sagemaker-data-jv/breast-cancer-detection/input/tfrecord/test/

upload: ./images_train.tfrecords to s3://sagemaker-data-jv/breast-cancer-detection/input/tfrecord/train/images_train.tfrecords
upload: ./images_test.tfrecords to s3://sagemaker-data-jv/breast-cancer-detection/input/tfrecord/test/images_test.tfrecords


# Creating a model using Tensorflow

## Configure hyperparameters

In [15]:
# Number of output classes
num_classes = 2

# Batch size for training
mini_batch_size =  128

# Max steps for training
max_steps = 5000

# Learning rate
learning_rate = 0.01

## Create a unique job name 

In [16]:
job_name_prefix = 'breast-cancer-detection'
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
job_name = job_name_prefix + timestamp

## Specify the input paths for the job

In [17]:
bucket = 'sagemaker-data-jv'
input_prefix = 'breast-cancer-detection/input/tfrecord'
input_train = 's3://{}/{}/train/'.format(bucket, input_prefix)
input_test = 's3://{}/{}/test/'.format(bucket, input_prefix)

## Specify the output path for the job

In [18]:
output_prefix = 'breast-cancer-detection/output'
output_path = 's3://{}/{}/'.format(bucket, output_prefix)

## Configure training instances

In [19]:
instance_count = 1
instance_type = 'ml.p2.xlarge'
volume_size_gb = 50

## Get the execution role

In [20]:
role = get_execution_role()

## Configure train timeout

In [21]:
train_timeout = 360000

## Specify the path to the training script

In [22]:
training_script_path = 'tensorflowScript.py'

## Create a sagemaker.TensorFlow estimator

In [23]:
estimator = TensorFlow(entry_point=training_script_path,
                       role=role,
                       train_instance_count=instance_count,
                       train_instance_type=instance_type,
                       train_volume_size=volume_size_gb,
                       train_max_run=train_timeout,
                       model_dir=output_path,
                       output_path=output_path,
                       framework_version='1.12.0',
                       py_version = 'py3',
                       hyperparameters = {
                           'num-classes': num_classes,
                           'mini-batch-size': mini_batch_size,
                           'max-steps': max_steps,
                           'learning-rate': learning_rate
                       },
                       metric_definitions = [
                           {
                               'Name': 'loss',
                               'Regex': 'loss = ([0-9\\.]+)'
                           }
                       ])

# Create a training job

In [18]:
estimator.fit({
    'train': input_train,
    'test': input_test
}, job_name = job_name)

2019-06-13 15:40:09 Starting - Starting the training job...
2019-06-13 15:40:10 Starting - Launching requested ML instances.........
2019-06-13 15:41:41 Starting - Preparing the instances for training......
2019-06-13 15:42:51 Downloading - Downloading input data......
2019-06-13 15:44:11 Training - Downloading the training image..
[31m2019-06-13 15:44:19,553 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[31m2019-06-13 15:44:20,276 sagemaker-containers INFO     Invoking user script
[0m
[31mTraining Env:
[0m
[31m{
    "additional_framework_parameters": {},
    "channel_input_dirs": {
        "test": "/opt/ml/input/data/test",
        "train": "/opt/ml/input/data/train"
    },
    "current_host": "algo-1",
    "framework_module": "sagemaker_tensorflow_container.training:main",
    "hosts": [
        "algo-1"
    ],
    "hyperparameters": {
        "num-classes": 2,
        "learning-rate": 0.01,
        "max-steps": 5000,
        "mode


2019-06-13 15:44:15 Training - Training image download completed. Training in progress.[31mINFO:tensorflow:global_step/sec: 36.4447[0m
[31mINFO:tensorflow:loss = 0.55575454, step = 100 (2.744 sec)[0m
[31mINFO:tensorflow:global_step/sec: 36.6191[0m
[31mINFO:tensorflow:loss = 0.59828985, step = 200 (2.731 sec)[0m
[31mINFO:tensorflow:global_step/sec: 37.562[0m
[31mINFO:tensorflow:loss = 0.61382616, step = 300 (2.662 sec)[0m
[31mINFO:tensorflow:global_step/sec: 37.4797[0m
[31mINFO:tensorflow:loss = 0.5536806, step = 400 (2.668 sec)[0m
[31mINFO:tensorflow:global_step/sec: 37.4923[0m
[31mINFO:tensorflow:loss = 0.58829117, step = 500 (2.667 sec)[0m
[31mINFO:tensorflow:global_step/sec: 37.2925[0m
[31mINFO:tensorflow:loss = 0.54433995, step = 600 (2.681 sec)[0m
[31mINFO:tensorflow:global_step/sec: 37.4782[0m
[31mINFO:tensorflow:loss = 0.5853925, step = 700 (2.668 sec)[0m
[31mINFO:tensorflow:global_step/sec: 36.954[0m
[31mINFO:tensorflow:loss = 0.55538493, step = 


2019-06-13 15:49:31 Uploading - Uploading generated training model
2019-06-13 15:49:31 Completed - Training job completed
Billable seconds: 400


# Creating a tuning job

## Defining tuning configuration

In [24]:
hyperparameter_ranges = {
    'learning_rate': ContinuousParameter(0.001, 1.0)
}
objective_metric_name = 'loss'
objective_type = 'Minimize'

max_jobs=2
max_parallel_jobs=2

## Create a unique job name

In [25]:
job_name_prefix = 'bcd-tuning'
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
job_name = job_name_prefix + timestamp

## Creating a hyperparameter tuner

In [28]:
tuner = HyperparameterTuner(estimator=estimator, 
                            objective_metric_name=objective_metric_name, 
                            hyperparameter_ranges=hyperparameter_ranges,
                            objective_type=objective_type, 
                            max_jobs=max_jobs, 
                            max_parallel_jobs=max_parallel_jobs,
                            metric_definitions = [
                                {
                                   'Name': 'loss',
                                   'Regex': 'loss = ([0-9\\.]+)'
                                }
                            ])

## Launch the tuning job

In [29]:
tuner.fit({
    'train': input_train,
    'test': input_test
}, job_name = job_name)
tuner.wait()