In [1]:
import sagemaker
from sagemaker import get_execution_role
 
role = get_execution_role()
session = sagemaker.Session()

In [2]:
bucket = session.default_bucket()

prefix = 'pascalvoc'

s3_output_location = 's3://{}/{}/output'.format(bucket, prefix)

print(s3_output_location)

s3://sagemaker-eu-west-1-613904931467/pascalvoc/output


In [None]:
# Update these settings with your own subnets and security group

file_system_id = 'fs-07914cf5a60649dc8'
mount_point_name = ''
subnets = ['subnet-63715206', 'subnet-cbf5bdbc', 'subnet-59395b00']
security_group_ids = ['sg-09238e6d']

In [33]:
from sagemaker.inputs import FileSystemInput

fsx_train_data = FileSystemInput(file_system_id=file_system_id,
                             file_system_type='FSxLustre',
                             directory_path=mount_point_name+'/pascalvoc/input/train')

fsx_validation_data = FileSystemInput(file_system_id=file_system_id,
                             file_system_type='FSxLustre',
                             directory_path=mount_point_name+'/pascalvoc/input/validation')

data_channels = {'train': fsx_train_data, 'validation': fsx_validation_data }

In [34]:
from sagemaker.amazon.amazon_estimator import get_image_uri

region = session.boto_region_name
container = get_image_uri(region, 'object-detection', repo_version='latest')

In [35]:
role = get_execution_role()

od = sagemaker.estimator.Estimator(container,
                                         role, 
                                         train_instance_count=1, 
                                         train_instance_type='ml.p3.2xlarge',
                                         output_path=s3_output_location,
                                         subnets=subnets,
                                         security_group_ids=)

In [38]:
od.set_hyperparameters(base_network='resnet-50',
                       use_pretrained_model=1,
                       num_classes=20,
                       epochs=30,
                       num_training_samples=16551,
                       mini_batch_size=90)

In [None]:
od.fit(inputs=data_channels)

2020-05-28 08:05:46 Starting - Starting the training job...
2020-05-28 08:05:50 Starting - Launching requested ML instances......
2020-05-28 08:06:54 Starting - Preparing the instances for training......
2020-05-28 08:08:06 Downloading - Downloading input data
2020-05-28 08:08:06 Training - Downloading the training image...
2020-05-28 08:08:40 Training - Training image download completed. Training in progress..[34mDocker entrypoint called with argument(s): train[0m
[34m[05/28/2020 08:08:43 INFO 139682424452928] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/default-input.json: {u'label_width': u'350', u'early_stopping_min_epochs': u'10', u'epochs': u'30', u'overlap_threshold': u'0.5', u'lr_scheduler_factor': u'0.1', u'_num_kv_servers': u'auto', u'weight_decay': u'0.0005', u'mini_batch_size': u'32', u'use_pretrained_model': u'0', u'freeze_layer_pattern': u'', u'lr_scheduler_step': u'', u'early_stopping': u'False', u'early_stopping_patience': u'5'

[34m[05/28/2020 08:13:27 INFO 139682424452928] Epoch:    1, batches:    100, num_examples:   9000, 127.0 samples/sec, epoch time so far:  0:01:10.864227[0m
[34m[05/28/2020 08:14:24 INFO 139682424452928] #quality_metric: host=algo-1, epoch=1, batch=184 train cross_entropy <loss>=(1.6586140477514673)[0m
[34m[05/28/2020 08:14:24 INFO 139682424452928] #quality_metric: host=algo-1, epoch=1, batch=184 train smooth_l1 <loss>=(0.7290871329570964)[0m
[34m[05/28/2020 08:14:24 INFO 139682424452928] Round of batches complete[0m
[34m[05/28/2020 08:14:24 INFO 139682424452928] Updated the metrics[0m
[34m[05/28/2020 08:15:07 INFO 139682424452928] #quality_metric: host=algo-1, epoch=1, validation mAP <score>=(0.0016984744121054003)[0m
[34m[05/28/2020 08:15:07 INFO 139682424452928] Updating the best model with validation-mAP=0.0016984744121054003[0m
[34m[05/28/2020 08:15:07 INFO 139682424452928] Saved checkpoint to "/opt/ml/model/model_algo_1-0000.params"[0m
[34m[05/28/2020 08:15:07 INF

[34m[05/28/2020 08:24:48 INFO 139682424452928] Epoch:    5, batches:    100, num_examples:   9000, 123.8 samples/sec, epoch time so far:  0:01:12.671812[0m
[34m[05/28/2020 08:25:43 INFO 139682424452928] #quality_metric: host=algo-1, epoch=5, batch=184 train cross_entropy <loss>=(1.189043409977805)[0m
[34m[05/28/2020 08:25:43 INFO 139682424452928] #quality_metric: host=algo-1, epoch=5, batch=184 train smooth_l1 <loss>=(0.6308928188814054)[0m
[34m[05/28/2020 08:25:43 INFO 139682424452928] Round of batches complete[0m
[34m[05/28/2020 08:25:43 INFO 139682424452928] Updated the metrics[0m
[34m[05/28/2020 08:26:24 INFO 139682424452928] #quality_metric: host=algo-1, epoch=5, validation mAP <score>=(0.01733458015898123)[0m
[34m[05/28/2020 08:26:24 INFO 139682424452928] Updating the best model with validation-mAP=0.01733458015898123[0m
[34m[05/28/2020 08:26:24 INFO 139682424452928] Saved checkpoint to "/opt/ml/model/model_algo_1-0000.params"[0m
[34m[05/28/2020 08:26:24 INFO 139

[34m[05/28/2020 08:36:00 INFO 139682424452928] Epoch:    9, batches:    100, num_examples:   9000, 125.1 samples/sec, epoch time so far:  0:01:11.955943[0m
[34m[05/28/2020 08:36:55 INFO 139682424452928] #quality_metric: host=algo-1, epoch=9, batch=183 train cross_entropy <loss>=(1.1424976680839862)[0m
[34m[05/28/2020 08:36:55 INFO 139682424452928] #quality_metric: host=algo-1, epoch=9, batch=183 train smooth_l1 <loss>=(0.5744026766746475)[0m
[34m[05/28/2020 08:36:55 INFO 139682424452928] Round of batches complete[0m
[34m[05/28/2020 08:36:55 INFO 139682424452928] Updated the metrics[0m
[34m[05/28/2020 08:37:35 INFO 139682424452928] #quality_metric: host=algo-1, epoch=9, validation mAP <score>=(0.06390672875782509)[0m
[34m[05/28/2020 08:37:35 INFO 139682424452928] Updating the best model with validation-mAP=0.06390672875782509[0m
[34m[05/28/2020 08:37:35 INFO 139682424452928] Saved checkpoint to "/opt/ml/model/model_algo_1-0000.params"[0m
[34m[05/28/2020 08:37:35 INFO 13

[34m[05/28/2020 08:47:14 INFO 139682424452928] Epoch:    13, batches:    100, num_examples:   9000, 125.0 samples/sec, epoch time so far:  0:01:11.998442[0m
[34m[05/28/2020 08:48:10 INFO 139682424452928] #quality_metric: host=algo-1, epoch=13, batch=184 train cross_entropy <loss>=(1.1087657456803937)[0m
[34m[05/28/2020 08:48:10 INFO 139682424452928] #quality_metric: host=algo-1, epoch=13, batch=184 train smooth_l1 <loss>=(0.5383844458690645)[0m
[34m[05/28/2020 08:48:10 INFO 139682424452928] Round of batches complete[0m
[34m[05/28/2020 08:48:10 INFO 139682424452928] Updated the metrics[0m
[34m[05/28/2020 08:48:51 INFO 139682424452928] #quality_metric: host=algo-1, epoch=13, validation mAP <score>=(0.09900629708288655)[0m
[34m[05/28/2020 08:48:51 INFO 139682424452928] Updating the best model with validation-mAP=0.09900629708288655[0m
[34m[05/28/2020 08:48:51 INFO 139682424452928] Saved checkpoint to "/opt/ml/model/model_algo_1-0000.params"[0m
[34m[05/28/2020 08:48:51 INF

[34m[05/28/2020 08:58:27 INFO 139682424452928] Epoch:    17, batches:    100, num_examples:   9000, 123.7 samples/sec, epoch time so far:  0:01:12.758798[0m
[34m[05/28/2020 08:59:23 INFO 139682424452928] #quality_metric: host=algo-1, epoch=17, batch=184 train cross_entropy <loss>=(1.0779719505755907)[0m
[34m[05/28/2020 08:59:23 INFO 139682424452928] #quality_metric: host=algo-1, epoch=17, batch=184 train smooth_l1 <loss>=(0.5169386700871883)[0m
[34m[05/28/2020 08:59:23 INFO 139682424452928] Round of batches complete[0m
[34m[05/28/2020 08:59:23 INFO 139682424452928] Updated the metrics[0m
[34m[05/28/2020 09:00:04 INFO 139682424452928] #quality_metric: host=algo-1, epoch=17, validation mAP <score>=(0.1346542614723774)[0m
[34m[05/28/2020 09:00:04 INFO 139682424452928] Updating the best model with validation-mAP=0.1346542614723774[0m
[34m[05/28/2020 09:00:04 INFO 139682424452928] Saved checkpoint to "/opt/ml/model/model_algo_1-0000.params"[0m
[34m[05/28/2020 09:00:04 INFO 

[34m[05/28/2020 09:09:46 INFO 139682424452928] Epoch:    21, batches:    100, num_examples:   9000, 124.6 samples/sec, epoch time so far:  0:01:12.236759[0m
[34m[05/28/2020 09:10:41 INFO 139682424452928] #quality_metric: host=algo-1, epoch=21, batch=184 train cross_entropy <loss>=(1.044557544669978)[0m
[34m[05/28/2020 09:10:41 INFO 139682424452928] #quality_metric: host=algo-1, epoch=21, batch=184 train smooth_l1 <loss>=(0.492699546369667)[0m
[34m[05/28/2020 09:10:41 INFO 139682424452928] Round of batches complete[0m
[34m[05/28/2020 09:10:41 INFO 139682424452928] Updated the metrics[0m
[34m[05/28/2020 09:11:22 INFO 139682424452928] #quality_metric: host=algo-1, epoch=21, validation mAP <score>=(0.17305177009206796)[0m
[34m[05/28/2020 09:11:22 INFO 139682424452928] Updating the best model with validation-mAP=0.17305177009206796[0m
[34m[05/28/2020 09:11:22 INFO 139682424452928] Saved checkpoint to "/opt/ml/model/model_algo_1-0000.params"[0m
[34m[05/28/2020 09:11:22 INFO 

[34m[05/28/2020 09:21:00 INFO 139682424452928] Epoch:    25, batches:    100, num_examples:   9000, 124.2 samples/sec, epoch time so far:  0:01:12.435263[0m
[34m[05/28/2020 09:21:56 INFO 139682424452928] #quality_metric: host=algo-1, epoch=25, batch=184 train cross_entropy <loss>=(1.0171928646872888)[0m
[34m[05/28/2020 09:21:56 INFO 139682424452928] #quality_metric: host=algo-1, epoch=25, batch=184 train smooth_l1 <loss>=(0.4754054269087408)[0m
[34m[05/28/2020 09:21:56 INFO 139682424452928] Round of batches complete[0m
[34m[05/28/2020 09:21:56 INFO 139682424452928] Updated the metrics[0m
[34m[05/28/2020 09:22:36 INFO 139682424452928] #quality_metric: host=algo-1, epoch=25, validation mAP <score>=(0.21077233849487684)[0m
[34m[05/28/2020 09:22:36 INFO 139682424452928] Updating the best model with validation-mAP=0.21077233849487684[0m
[34m[05/28/2020 09:22:36 INFO 139682424452928] Saved checkpoint to "/opt/ml/model/model_algo_1-0000.params"[0m
[34m[05/28/2020 09:22:36 INF

[34m[05/28/2020 09:32:16 INFO 139682424452928] Epoch:    29, batches:    100, num_examples:   9000, 123.0 samples/sec, epoch time so far:  0:01:13.158981[0m
[34m[05/28/2020 09:33:11 INFO 139682424452928] #quality_metric: host=algo-1, epoch=29, batch=183 train cross_entropy <loss>=(0.9861586545292738)[0m
[34m[05/28/2020 09:33:11 INFO 139682424452928] #quality_metric: host=algo-1, epoch=29, batch=183 train smooth_l1 <loss>=(0.4600855151046955)[0m
[34m[05/28/2020 09:33:11 INFO 139682424452928] Round of batches complete[0m
[34m[05/28/2020 09:33:11 INFO 139682424452928] Updated the metrics[0m
[34m[05/28/2020 09:33:52 INFO 139682424452928] #quality_metric: host=algo-1, epoch=29, validation mAP <score>=(0.25304284450916964)[0m
[34m[05/28/2020 09:33:52 INFO 139682424452928] Updating the best model with validation-mAP=0.25304284450916964[0m
[34m[05/28/2020 09:33:53 INFO 139682424452928] Saved checkpoint to "/opt/ml/model/model_algo_1-0000.params"[0m
[34m[05/28/2020 09:33:53 INF

In [None]:
od_predictor = od.deploy(initial_instance_count = 1, instance_type = 'ml.c5.2xlarge')

In [None]:
!wget -O test.jpg http://www.vision.caltech.edu/Image_Datasets/Caltech256/images/159.people/159_0090.jpg
with open(file_name, 'rb') as image:
    f = image.read()
    b = bytearray(f)

In [None]:
import json

od_predictor.content_type = 'image/jpeg'
results = od_predictor.predict(b)
response = json.loads(results)

The results are in a format that is similar to the .lst format with an addition of a confidence score for each detected object. The format of the output can be represented as `[class_index, confidence_score, xmin, ymin, xmax, ymax]`. Typically, we don't consider low-confidence predictions.

We have provided additional script to easily visualize the detection outputs. You can visulize the high-confidence preditions with bounding box by filtering out low-confidence detections using the script below:

In [None]:
print(detections)

In [None]:
def visualize_detection(img_file, dets, classes=[], thresh=0.6):
        """
        visualize detections in one image
        Parameters:
        ----------
        img : numpy.array
            image, in bgr format
        dets : numpy.array
            ssd detections, numpy.array([[id, score, x1, y1, x2, y2]...])
            each row is one object
        classes : tuple or list of str
            class names
        thresh : float
            score threshold
        """
        import random
        import matplotlib.pyplot as plt
        import matplotlib.image as mpimg

        img=mpimg.imread(img_file)
        plt.imshow(img)
        height = img.shape[0]
        width = img.shape[1]
        colors = dict()
        for det in dets:
            (klass, score, x0, y0, x1, y1) = det
            if score < thresh:
                continue
            cls_id = int(klass)
            if cls_id not in colors:
                colors[cls_id] = (random.random(), random.random(), random.random())
            xmin = int(x0 * width)
            ymin = int(y0 * height)
            xmax = int(x1 * width)
            ymax = int(y1 * height)
            rect = plt.Rectangle((xmin, ymin), xmax - xmin,
                                 ymax - ymin, fill=False,
                                 edgecolor=colors[cls_id],
                                 linewidth=3.5)
            plt.gca().add_patch(rect)
            class_name = str(cls_id)
            if classes and len(classes) > cls_id:
                class_name = classes[cls_id]
            plt.gca().text(xmin, ymin - 2,
                            '{:s} {:.3f}'.format(class_name, score),
                            bbox=dict(facecolor=colors[cls_id], alpha=0.5),
                                    fontsize=12, color='white')
        plt.show()

For the sake of this notebook, we trained the model with only a few (10) epochs. This implies that the results might not be optimal. To achieve better detection results, you can try to tune the hyperparameters and train the model for more epochs. In our tests, the mAP can reach 0.79 on the Pascal VOC dataset after training the algorithm with `learning_rate=0.0005`, `image_shape=512` and `mini_batch_size=16` for 240 epochs.

In [None]:
%matplotlib inline 

object_categories = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 
                     'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 
                     'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']

# Setting a threshold 0.20 will only plot detection results that have a confidence score greater than 0.20.
threshold = 0.30

# Visualize the detections.
visualize_detection(file_name, detections['prediction'], object_categories, threshold)

In [None]:
od_predictor.delete_endpoint()