In [1]:
import boto3
import sagemaker

print(sagemaker.__version__)

session = sagemaker.Session()
bucket = session.default_bucket()

2.0.0rc1


In [2]:
prefix = 'dogscats'
s3_train_path = 's3://{}/{}/input/train/'.format(bucket, prefix)
s3_val_path   = 's3://{}/{}/input/validation/'.format(bucket, prefix)
s3_output     = 's3://{}/{}/output/'.format(bucket, prefix)

print(s3_train_path)
print(s3_val_path)
print(s3_output)

s3://sagemaker-us-east-1-613904931467/dogscats/input/train/
s3://sagemaker-us-east-1-613904931467/dogscats/input/validation/
s3://sagemaker-us-east-1-613904931467/dogscats/output/


### Get the name of the image classification algorithm in our region

In [3]:
from sagemaker import image_uris

region = boto3.Session().region_name    
container = image_uris.retrieve('image-classification', region)
print(container)

811284229777.dkr.ecr.us-east-1.amazonaws.com/image-classification:1


### Configure the training job

In [4]:
role = sagemaker.get_execution_role()

ic = sagemaker.estimator.Estimator(container,
                                   role, 
                                   instance_count=1, 
                                   instance_type='ml.p3.2xlarge',
                                   output_path=s3_output)

### Set algorithm parameters

In [5]:
#precision_dtype = 'float16'
precision_dtype = 'float32'

ic.set_hyperparameters(num_layers=50,               # Train a Resnet-50 model
                       use_pretrained_model=0,      # Train from scratch
                       num_classes=2,               # Dogs and cats
                       num_training_samples=22500,  # Number of training samples
                       mini_batch_size=128,
                       precision_dtype=precision_dtype,
                       epochs=30)                   # Learn the training samples 30 times

### Set dataset parameters

In [6]:
train_data = sagemaker.TrainingInput(s3_train_path, 
                                        distribution='FullyReplicated', 
                                        content_type='application/x-recordio',
                                        s3_data_type='S3Prefix')

validation_data = sagemaker.TrainingInput(s3_val_path,
                                             distribution='FullyReplicated', 
                                             content_type='application/x-recordio', 
                                             s3_data_type='S3Prefix')

s3_channels = {'train': train_data, 'validation': validation_data}

### Train the model

In [None]:
ic.fit(inputs=s3_channels)

2020-07-24 17:41:57 Starting - Starting the training job...
2020-07-24 17:42:00 Starting - Launching requested ML instances.........
2020-07-24 17:43:49 Starting - Preparing the instances for training......
2020-07-24 17:44:38 Downloading - Downloading input data...
2020-07-24 17:45:16 Training - Downloading the training image...
2020-07-24 17:45:50 Training - Training image download completed. Training in progress..[34mDocker entrypoint called with argument(s): train[0m
[34m[07/24/2020 17:45:53 INFO 139697773139776] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/image_classification/default-input.json: {u'beta_1': 0.9, u'gamma': 0.9, u'beta_2': 0.999, u'optimizer': u'sgd', u'use_pretrained_model': 0, u'eps': 1e-08, u'epochs': 30, u'lr_scheduler_factor': 0.1, u'num_layers': 152, u'image_shape': u'3,224,224', u'precision_dtype': u'float32', u'mini_batch_size': 32, u'weight_decay': 0.0001, u'learning_rate': 0.1, u'momentum': 0}[0m
[34m[07/24/2020 17:45:53

[34m[07/24/2020 17:49:31 INFO 139697773139776] Epoch[3] Batch [20]#011Speed: 340.019 samples/sec#011accuracy=0.674479[0m
[34m[07/24/2020 17:49:38 INFO 139697773139776] Epoch[3] Batch [40]#011Speed: 346.652 samples/sec#011accuracy=0.679688[0m
[34m[07/24/2020 17:49:45 INFO 139697773139776] Epoch[3] Batch [60]#011Speed: 349.084 samples/sec#011accuracy=0.681481[0m
[34m[07/24/2020 17:49:53 INFO 139697773139776] Epoch[3] Batch [80]#011Speed: 350.420 samples/sec#011accuracy=0.685089[0m
[34m[07/24/2020 17:50:00 INFO 139697773139776] Epoch[3] Batch [100]#011Speed: 351.157 samples/sec#011accuracy=0.687500[0m
[34m[07/24/2020 17:50:07 INFO 139697773139776] Epoch[3] Batch [120]#011Speed: 351.636 samples/sec#011accuracy=0.688985[0m
[34m[07/24/2020 17:50:14 INFO 139697773139776] Epoch[3] Batch [140]#011Speed: 351.973 samples/sec#011accuracy=0.690270[0m
[34m[07/24/2020 17:50:22 INFO 139697773139776] Epoch[3] Batch [160]#011Speed: 352.341 samples/sec#011accuracy=0.692741[0m
[34m[07/24/

[34m[07/24/2020 17:56:07 INFO 139697773139776] Epoch[9] Batch [20]#011Speed: 340.113 samples/sec#011accuracy=0.853423[0m
[34m[07/24/2020 17:56:15 INFO 139697773139776] Epoch[9] Batch [40]#011Speed: 346.751 samples/sec#011accuracy=0.852706[0m
[34m[07/24/2020 17:56:22 INFO 139697773139776] Epoch[9] Batch [60]#011Speed: 349.279 samples/sec#011accuracy=0.849001[0m
[34m[07/24/2020 17:56:29 INFO 139697773139776] Epoch[9] Batch [80]#011Speed: 350.392 samples/sec#011accuracy=0.850212[0m
[34m[07/24/2020 17:56:36 INFO 139697773139776] Epoch[9] Batch [100]#011Speed: 350.957 samples/sec#011accuracy=0.849861[0m
[34m[07/24/2020 17:56:44 INFO 139697773139776] Epoch[9] Batch [120]#011Speed: 351.436 samples/sec#011accuracy=0.853887[0m
[34m[07/24/2020 17:56:51 INFO 139697773139776] Epoch[9] Batch [140]#011Speed: 351.782 samples/sec#011accuracy=0.852615[0m
[34m[07/24/2020 17:56:58 INFO 139697773139776] Epoch[9] Batch [160]#011Speed: 352.070 samples/sec#011accuracy=0.850349[0m
[34m[07/24/

[34m[07/24/2020 18:02:58 INFO 139697773139776] Epoch[15] Batch [60]#011Speed: 351.237 samples/sec#011accuracy=0.926998[0m
[34m[07/24/2020 18:03:05 INFO 139697773139776] Epoch[15] Batch [80]#011Speed: 352.025 samples/sec#011accuracy=0.928241[0m
[34m[07/24/2020 18:03:12 INFO 139697773139776] Epoch[15] Batch [100]#011Speed: 352.351 samples/sec#011accuracy=0.931002[0m
[34m[07/24/2020 18:03:19 INFO 139697773139776] Epoch[15] Batch [120]#011Speed: 352.641 samples/sec#011accuracy=0.929817[0m
[34m[07/24/2020 18:03:27 INFO 139697773139776] Epoch[15] Batch [140]#011Speed: 352.811 samples/sec#011accuracy=0.931184[0m
[34m[07/24/2020 18:03:34 INFO 139697773139776] Epoch[15] Batch [160]#011Speed: 352.954 samples/sec#011accuracy=0.931240[0m
[34m[07/24/2020 18:03:39 INFO 139697773139776] Epoch[15] Train-accuracy=0.931830[0m
[34m[07/24/2020 18:03:39 INFO 139697773139776] Epoch[15] Time cost=63.065[0m
[34m[07/24/2020 18:03:42 INFO 139697773139776] Epoch[15] Validation-accuracy=0.815625

[34m[07/24/2020 18:10:10 INFO 139697773139776] Epoch[21] Batch [160]#011Speed: 352.321 samples/sec#011accuracy=0.970740[0m
[34m[07/24/2020 18:10:15 INFO 139697773139776] Epoch[21] Train-accuracy=0.971295[0m
[34m[07/24/2020 18:10:15 INFO 139697773139776] Epoch[21] Time cost=63.165[0m
[34m[07/24/2020 18:10:18 INFO 139697773139776] Epoch[21] Validation-accuracy=0.837171[0m
[34m[07/24/2020 18:10:18 INFO 139697773139776] Storing the best model with validation accuracy: 0.837171[0m
[34m[07/24/2020 18:10:18 INFO 139697773139776] Saved checkpoint to "/opt/ml/model/image-classification-0022.params"[0m
[34m[07/24/2020 18:10:25 INFO 139697773139776] Epoch[22] Batch [20]#011Speed: 339.767 samples/sec#011accuracy=0.974330[0m
[34m[07/24/2020 18:10:33 INFO 139697773139776] Epoch[22] Batch [40]#011Speed: 346.807 samples/sec#011accuracy=0.974085[0m
[34m[07/24/2020 18:10:40 INFO 139697773139776] Epoch[22] Batch [60]#011Speed: 349.353 samples/sec#011accuracy=0.972848[0m
[34m[07/24/2020

### Deploy the model

In [None]:
import time

ic_endpoint_name = 'ic-'+time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

ic_predictor = ic.deploy(initial_instance_count=1,
                         instance_type='ml.c5.4xlarge',
                         endpoint_name=ic_endpoint_name,
                         wait=False)

## Compile and deploy the model with Neo

In [10]:
output_path = 's3://{}/{}/output-neo/'.format(bucket, prefix)

ic_neo_model = ic.compile_model(target_instance_family='ml_c5', 
                                   input_shape={'data':[1, 3, 224, 224]},
                                   role=role,
                                   framework='mxnet',
                                   framework_version='1.5.1',
                                   output_path=output_path)

?.............!

'create_image_uri' will be deprecated in favor of 'ImageURIProvider' class in SageMaker Python SDK v2.


In [None]:
ic_neo_endpoint_name = 'ic-neo-'+time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
print(ic_neo_endpoint_name)

ic_neo_model.image_uri = image_uris.retrieve('image-classification-neo', region)
print(ic_neo_model.image_uri)


ic_neo_predictor = ic_neo_model.deploy(endpoint_name=ic_neo_endpoint_name, 
                        initial_instance_count=1, 
                        instance_type='ml.c5.4xlarge')

ic-neo-2020-07-24-20-05-38
785573368785.dkr.ecr.us-east-1.amazonaws.com/image-classification-neo:latest
------------

### Download a test image

In [None]:
!wget -O /tmp/test.jpg https://upload.wikimedia.org/wikipedia/commons/b/b7/LabradorWeaving.jpg
file_name = '/tmp/test.jpg'
from IPython.display import Image
Image(file_name)

### Predict test image

In [None]:
# Load test image from file
with open(file_name, 'rb') as f:
    payload = f.read()
    payload = bytearray(payload)

def predict_images(predictor, iterations=1000):
    total = 0
    predictor.content_type = 'application/x-image'
    for i in range(0, iterations):
        tick = time.time()
        response = predictor.predict(payload)
        tock = time.time()
        total += tock-tick
    return total/iterations

In [None]:
%%time
predict_images(ic_predictor)

In [None]:
%%time
predict_images(ic_neo_predictor)

In [None]:
%%sh -s $output_path
echo $1
aws s3 ls $1
aws s3 cp $1model-ml_c5.tar.gz .
tar xvfz model-ml_c5.tar.gz

### Delete endpoints

In [None]:
ic_predictor.delete_endpoint()

In [None]:
ic_neo_predictor.delete_endpoint()

In [None]:
output_path = 's3://{}/{}/output-neo/'.format(bucket, prefix)

ic_neo_model = ic.compile_model(target_instance_family='rasp3b', 
                                   input_shape={'data':[1, 3, 224, 224]},
                                   role=role,
                                   framework='mxnet',
                                   framework_version='1.5.1',
                                   output_path=output_path)