In [1]:
# this is an example of how we can resume training (or to perform incremental training)
# it follows the following example
# https://github.com/aws/amazon-sagemaker-examples/blob/master/introduction_to_amazon_algorithms/object_detection_pascalvoc_coco/object_detection_incremental_training.ipynb


#%%time
import sagemaker
from sagemaker import get_execution_role

role = get_execution_role()
print(role)
sess = sagemaker.Session()

arn:aws:iam::053061259712:role/service-role/AmazonSageMaker-ExecutionRole-20201027T104360


In [2]:
bucket_name = 'treetracker-training-images'
print(bucket_name)
prefix = 'imnet'

treetracker-training-images


In [3]:
print(prefix)

imnet


In [4]:
import sagemaker.image_uris as image_uris

training_image = image_uris.retrieve('object-detection', sess.boto_region_name)

print (training_image)
print(sess.boto_region_name)

813361260812.dkr.ecr.eu-central-1.amazonaws.com/object-detection:1
eu-central-1


In [9]:
%%time
! rm data/train.* -rf   
! mkdir data
! pwd
! ls data
! pip install mxnet

! python3 convert_imnet_to_recordio.py

mkdir: cannot create directory ‘data’: File exists
/home/ec2-user/SageMaker/treetracker-machine-learning/imnet/imnet_to_record_io
aug_test.idx  aug_test.rec   aug_train.lst  bounding_boxes  imnet.names
aug_test.lst  aug_train.idx  aug_train.rec  images.txt	    original_images
You should consider upgrading via the '/home/ec2-user/anaconda3/envs/python3/bin/python -m pip install --upgrade pip' command.[0m
These downloads are big and may take some time... please be patient :)
Seems like judas raw images have already been downloaded. If you wish to redownload, delete the original directory with the corresponding wnid n12513613
Seems like the n12513613  annotations have already been downloaded. If you wish to redownload, delete the original directory with the corresponding wnid. 
Raw image data and bounding boxes for judas (wnid n12513613)  finished in  4.220008850097656e-05 seconds
Seems like palm raw images have already been downloaded. If you wish to redownload, delete the original dire

In [10]:
%%time

key_train = "{}/train.rec".format(prefix)
s3_train_data = 's3://{}/{}'.format(bucket_name, key_train)
print('Done writing to {}'.format(s3_train_data))

key_validation = "{}/validation.rec".format(prefix)
s3_validation_data = 's3://{}/{}'.format(bucket_name, key_validation)


Done writing to s3://treetracker-training-images/imnet/train.rec
CPU times: user 102 µs, sys: 0 ns, total: 102 µs
Wall time: 71.3 µs


In [11]:
s3_output_location = 's3://{}/{}/output'.format(bucket_name, prefix)
print(s3_output_location)

s3://treetracker-training-images/imnet/output


In [12]:
new_od_model = sagemaker.estimator.Estimator(training_image,
                                             role, 
                                             instance_count=1, 
                                             instance_type='ml.p3.2xlarge',
                                             volume_size = 50,
                                             max_run = 360000,
                                             input_mode= 'File',
                                             output_path=s3_output_location,
                                             sagemaker_session=sess)

In [13]:
num_species = 2 # this should be the number of lines of imnet.names
my_training_samples = 2451

new_od_model.set_hyperparameters(base_network='resnet-50',
                                 use_pretrained_model=1,
                                 num_classes=num_species,
                                 mini_batch_size=32,
                                 epochs=1,
                                 learning_rate=0.001,
                                 lr_scheduler_step='3,6',
                                 lr_scheduler_factor=0.1,
                                 optimizer='sgd',
                                 momentum=0.9,
                                 weight_decay=0.0005,
                                 overlap_threshold=0.5,
                                 nms_threshold=0.45,
                                 image_shape=300,
                                 label_width=350,
                                 num_training_samples=my_training_samples)

In [14]:
train_data = sagemaker.session.TrainingInput(s3_train_data, distribution='FullyReplicated', 
                        content_type='application/x-recordio', s3_data_type='S3Prefix')

validation_data = sagemaker.session.TrainingInput(s3_validation_data, distribution='FullyReplicated', 
                              content_type='application/x-recordio', s3_data_type='S3Prefix')

s3_model_data = 's3://treetracker-training-images/imnet/output/tree-nontree/object-detection-2021-01-01-21-26-10-808/output/model.tar.gz'

model_data = sagemaker.session.TrainingInput(s3_model_data, distribution='FullyReplicated', 
                             content_type='application/x-sagemaker-model', s3_data_type='S3Prefix')

# In addition to two data channels, add a 'model' channel for the training.
data_channels_with_initial_model = {'train': train_data, 'validation': validation_data, 'model': model_data}

In [15]:
new_od_model.fit(inputs=data_channels_with_initial_model, logs=True)

2021-01-30 17:35:30 Starting - Starting the training job...
2021-01-30 17:35:54 Starting - Launching requested ML instancesProfilerReport-1612028130: InProgress
......
2021-01-30 17:36:54 Starting - Preparing the instances for training......
2021-01-30 17:37:55 Downloading - Downloading input data...
2021-01-30 17:38:15 Training - Downloading the training image..[34mDocker entrypoint called with argument(s): train[0m
[34m[01/30/2021 17:38:38 INFO 139721585260352] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/default-input.json: {u'label_width': u'350', u'early_stopping_min_epochs': u'10', u'epochs': u'30', u'overlap_threshold': u'0.5', u'lr_scheduler_factor': u'0.1', u'_num_kv_servers': u'auto', u'weight_decay': u'0.0005', u'mini_batch_size': u'32', u'use_pretrained_model': u'0', u'freeze_layer_pattern': u'', u'lr_scheduler_step': u'', u'early_stopping': u'False', u'early_stopping_patience': u'5', u'momentum': u'0.9', u'num_training_samples': u


2021-01-30 17:39:57 Completed - Training job completed
ProfilerReport-1612028130: NoIssuesFound
Training seconds: 121
Billable seconds: 121


In [None]:
%%time
object_detector = new_od_model.deploy(initial_instance_count = 1,
                                 instance_type = 'ml.m4.xlarge')
object_detector.endpoint_name

--------------

In [17]:
import json
from image_utils import visualize_detection

def compute_detections(file_name):
    with open(file_name, 'rb') as image:
        f = image.read()
        b = bytearray(f)
        ne = open('n.txt','wb')
        ne.write(b)

    results = object_detector.predict(b, initial_args={'ContentType': 'image/jpeg'})
    detections = json.loads(results)
    # print (detections)

    # Setting a threshold 0.20 will only plot detection results that have a confidence score greater than 0.20.
    object_categories = ['tree', 'nontree']
    threshold = 0.5

    # Visualize the detections.
    visualize_detection(file_name, detections['prediction'], object_categories, threshold)

In [18]:
file_name = []
file_name.append('data/original_images/judas/n12513613_7251.JPEG')
file_name.append('data/original_images/judas/n12513613_7922.JPEG')
file_name.append('data/original_images/judas/n12513613_4910.JPEG')
file_name.append('data/original_images/fig/n12401684_10111.JPEG')
file_name.append('data/original_images/fig/n12401684_23162.JPEG')
file_name.append('data/original_images/fig/n12401684_2691.JPEG')
file_name.append('data/original_images/palm/n12582231_26277.JPEG')
file_name.append('data/original_images/palm/n12582231_23552.JPEG')
file_name.append('data/original_images/palm/n12582231_10693.JPEG')
file_name.append('data/original_images/pine/n11608250_21659.JPEG')
file_name.append('data/original_images/pine/n11608250_24462.JPEG')
file_name.append('data/original_images/pine/n11608250_11636.JPEG')
file_name.append('data/original_images/china tree/n12741792_12058.JPEG')
file_name.append('data/original_images/china tree/n12741792_4736.JPEG')
file_name.append('data/original_images/china tree/n12741792_693.JPEG')

In [19]:
for file in file_name:
    compute_detections(file)

FileNotFoundError: [Errno 2] No such file or directory: 'data/original_images/judas/n12513613_7251.JPEG'