In [1]:
import boto3
import sagemaker

session = sagemaker.Session()
bucket = session.default_bucket()

In [2]:
prefix = 'dogscats'
s3_train_path = 's3://{}/{}/input/train/'.format(bucket, prefix)
s3_val_path   = 's3://{}/{}/input/validation/'.format(bucket, prefix)
s3_output     = 's3://{}/{}/output/'.format(bucket, prefix)

print(s3_train_path)
print(s3_val_path)
print(s3_output)

s3://sagemaker-us-east-1-613904931467/dogscats/input/train/
s3://sagemaker-us-east-1-613904931467/dogscats/input/validation/
s3://sagemaker-us-east-1-613904931467/dogscats/output/


### Get the name of the image classification algorithm in our region

In [3]:
from sagemaker.amazon.amazon_estimator import get_image_uri

region_name = boto3.Session().region_name
container = get_image_uri(region_name, "image-classification", "latest")
print(container)

'get_image_uri' method will be deprecated in favor of 'ImageURIProvider' class in SageMaker Python SDK v2.


811284229777.dkr.ecr.us-east-1.amazonaws.com/image-classification:latest


### Configure the training job

In [4]:
role = sagemaker.get_execution_role()

ic = sagemaker.estimator.Estimator(container,
                                   role, 
                                   train_instance_count=1, 
                                   train_instance_type='ml.p3.2xlarge',
                                   output_path=s3_output,
                                   sagemaker_session=session)



### Set algorithm parameters

In [5]:
#precision_dtype = 'float16'
precision_dtype = 'float32'

ic.set_hyperparameters(num_layers=50,               # Train a Resnet-50 model
                       use_pretrained_model=0,      # Train from scratch
                       num_classes=2,               # Dogs and cats
                       num_training_samples=22500,  # Number of training samples
                       mini_batch_size=128,
                       precision_dtype=precision_dtype,
                       epochs=30)                   # Learn the training samples 30 times

### Set dataset parameters

In [6]:
train_data = sagemaker.session.s3_input(s3_train_path, 
                                        distribution='FullyReplicated', 
                                        content_type='application/x-recordio',
                                        s3_data_type='S3Prefix')

validation_data = sagemaker.session.s3_input(s3_val_path,
                                             distribution='FullyReplicated', 
                                             content_type='application/x-recordio', 
                                             s3_data_type='S3Prefix')

s3_channels = {'train': train_data, 'validation': validation_data}



### Train the model

In [None]:
ic.fit(inputs=s3_channels)

2020-07-01 12:56:06 Starting - Starting the training job...
2020-07-01 12:56:08 Starting - Launching requested ML instances......
2020-07-01 12:57:26 Starting - Preparing the instances for training.........
2020-07-01 12:58:44 Downloading - Downloading input data...
2020-07-01 12:59:14 Training - Downloading the training image.[34mDocker entrypoint called with argument(s): train[0m
[34m[07/01/2020 12:59:42 INFO 140326804956992] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/image_classification/default-input.json: {u'beta_1': 0.9, u'gamma': 0.9, u'beta_2': 0.999, u'optimizer': u'sgd', u'use_pretrained_model': 0, u'eps': 1e-08, u'epochs': 30, u'lr_scheduler_factor': 0.1, u'num_layers': 152, u'image_shape': u'3,224,224', u'precision_dtype': u'float32', u'mini_batch_size': 32, u'weight_decay': 0.0001, u'learning_rate': 0.1, u'momentum': 0}[0m
[34m[07/01/2020 12:59:42 INFO 140326804956992] Merging with provided configuration from /opt/ml/input/config/hyperp

[34m[07/01/2020 13:03:24 INFO 140326804956992] Epoch[3] Batch [40]#011Speed: 349.796 samples/sec#011accuracy=0.686547[0m
[34m[07/01/2020 13:03:31 INFO 140326804956992] Epoch[3] Batch [60]#011Speed: 352.066 samples/sec#011accuracy=0.687116[0m
[34m[07/01/2020 13:03:38 INFO 140326804956992] Epoch[3] Batch [80]#011Speed: 353.082 samples/sec#011accuracy=0.691551[0m
[34m[07/01/2020 13:03:45 INFO 140326804956992] Epoch[3] Batch [100]#011Speed: 353.754 samples/sec#011accuracy=0.696241[0m
[34m[07/01/2020 13:03:53 INFO 140326804956992] Epoch[3] Batch [120]#011Speed: 354.228 samples/sec#011accuracy=0.697508[0m
[34m[07/01/2020 13:04:00 INFO 140326804956992] Epoch[3] Batch [140]#011Speed: 354.567 samples/sec#011accuracy=0.698582[0m
[34m[07/01/2020 13:04:07 INFO 140326804956992] Epoch[3] Batch [160]#011Speed: 354.845 samples/sec#011accuracy=0.699680[0m
[34m[07/01/2020 13:04:12 INFO 140326804956992] Epoch[3] Train-accuracy=0.700759[0m
[34m[07/01/2020 13:04:12 INFO 140326804956992] Ep

[34m[07/01/2020 13:09:39 INFO 140326804956992] Epoch[8] Train-accuracy=0.831205[0m
[34m[07/01/2020 13:09:39 INFO 140326804956992] Epoch[8] Time cost=62.710[0m
[34m[07/01/2020 13:09:42 INFO 140326804956992] Epoch[8] Validation-accuracy=0.786595[0m
[34m[07/01/2020 13:09:50 INFO 140326804956992] Epoch[9] Batch [20]#011Speed: 342.783 samples/sec#011accuracy=0.843750[0m
[34m[07/01/2020 13:09:57 INFO 140326804956992] Epoch[9] Batch [40]#011Speed: 349.514 samples/sec#011accuracy=0.849085[0m
[34m[07/01/2020 13:10:04 INFO 140326804956992] Epoch[9] Batch [60]#011Speed: 351.835 samples/sec#011accuracy=0.850410[0m
[34m[07/01/2020 13:10:11 INFO 140326804956992] Epoch[9] Batch [80]#011Speed: 352.951 samples/sec#011accuracy=0.852816[0m
[34m[07/01/2020 13:10:18 INFO 140326804956992] Epoch[9] Batch [100]#011Speed: 353.674 samples/sec#011accuracy=0.853187[0m
[34m[07/01/2020 13:10:25 INFO 140326804956992] Epoch[9] Batch [120]#011Speed: 354.193 samples/sec#011accuracy=0.851821[0m
[34m[0

### Deploy the model

In [None]:
import time

ic_endpoint_name = 'ic-'+time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

ic_predictor = ic.deploy(initial_instance_count=1,
                         instance_type='ml.c5.4xlarge',
                         endpoint_name=ic_endpoint_name,
                         wait=False)

## Compile and deploy the model with Neo

In [None]:
#output_path = '/'.join(ic.output_path.split('/')[:-1])
output_path = 's3://{}/{}/output-neo/'.format(bucket, prefix)

ic_neo_model = ic.compile_model(target_instance_family='ml_c5', 
                                   input_shape={'data':[1, 3, 224, 224]},
                                   role=role,
                                   framework='mxnet',
                                   framework_version='1.5.1',
                                   output_path=output_path)

In [None]:
ic_neo_endpoint_name = 'ic-neo-'+time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
print(ic_neo_endpoint_name)

ic_neo_model.image = get_image_uri(session.boto_region_name, 'image-classification-neo', repo_version='latest')

ic_neo_predictor = ic_neo_model.deploy(endpoint_name=ic_neo_endpoint_name, 
                        initial_instance_count=1, 
                        instance_type='ml.c5.4xlarge')

### Download a test image

In [None]:
# Dog
!wget -O /tmp/test.jpg http://www.vision.caltech.edu/Image_Datasets/Caltech256/images/056.dog/056_0010.jpg
file_name = '/tmp/test.jpg'
from IPython.display import Image
Image(file_name)

### Predict test image

In [None]:
# Load test image from file
with open(file_name, 'rb') as f:
    payload = f.read()
    payload = bytearray(payload)

def predict_images(predictor, iterations=1000):
    total = 0
    predictor.content_type = 'application/x-image'
    for i in range(0, iterations):
        tick = time.time()
        response = predictor.predict(payload)
        tock = time.time()
        total += tock-tick
    return total/iterations

In [None]:
%%time
predict_images(ic_predictor)

In [None]:
%%time
predict_images(ic_neo_predictor)

In [None]:
%%sh -s $output_path
echo $1
aws s3 ls $1
aws s3 cp $1model-ml_c5.tar.gz .
tar xvfz model-ml_c5.tar.gz

### Delete endpoints

In [None]:
ic_predictor.delete_endpoint()

In [None]:
ic_neo_predictor.delete_endpoint()

In [None]:
#output_path = '/'.join(ic.output_path.split('/')[:-1])
output_path = 's3://{}/{}/output-neo/'.format(bucket, prefix)

ic_neo_model = ic.compile_model(target_instance_family='rasp3b', 
                                   input_shape={'data':[1, 3, 224, 224]},
                                   role=role,
                                   framework='mxnet',
                                   framework_version='1.5.1',
                                   output_path=output_path)