## Setup
Setup and authenticate the use of AWS services.

In [1]:
%%time
import sagemaker
from sagemaker import get_execution_role

role = get_execution_role()
sess = sagemaker.Session()



CPU times: user 962 ms, sys: 139 ms, total: 1.1 s
Wall time: 1.25 s


In [19]:
bucket = 'sagemaker-car-detection' # custom bucket name.
prefix = 'car-Detection'

from sagemaker.amazon.amazon_estimator import get_image_uri

training_image = get_image_uri(sess.boto_region_name, 'object-detection', repo_version="latest")


The method get_image_uri has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
Defaulting to the only supported framework/algorithm version: 1. Ignoring framework/algorithm version: latest.


In [3]:
%%bash
#Create folders to store the data and annotation files
mkdir car-generated car-train car-train_annotation car-validation car-validation_annotation

## Getting the data
The model has been trained on Cars Dataset from Stanford

In [5]:
import os
import urllib.request

def download(url):
    filename = url.split("/")[-1]
    if not os.path.exists(filename):
        urllib.request.urlretrieve(url, filename)


download('http://ai.stanford.edu/~jkrause/car196/cars_train.tgz')
download('https://ai.stanford.edu/~jkrause/cars/car_devkit.tgz')

In [6]:
%%bash
tar -xzf car_devkit.tgz
tar -xzf cars_train.tgz

## Processing the data
The Amazon SageMaker Object Detection algorithm expects lables to be indexed from 0. It also expects lables to be unique, successive and not skip any integers. For instance, if there are ten classes, the algorithm expects and the labels only be in the set [0,1,2,3,4,5,6,7,8,9].

In the car validation set, the labels do not satistify this requirement. Some indices are skipped and the labels start from 1. We therefore need a mapper that will convert this index system to our requirement.

In [8]:
def get_mapper_fn(map):  
    def mapper(in_category):
        return map[in_category]
    return mapper

import json
import logging

        
def readClasses(matFile):   
    content = sio.loadmat(matFile)
    classes = [(_[0]) for _ in content['class_names'][0]]
    return classes    

def readAnnos(matFile):   
    content = sio.loadmat(matFile)
    return content['annotations'][0]

def get_class_mapper():
    original_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 
                     19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 
                     36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 
                     54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 
                     72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 
                     90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 
                     107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 
                     122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 
                     137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 
                     152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 
                     167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 
                     182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196]
    iter_counter = 0
    CLASS = {}
    for orig in original_list:
        CLASS[orig] = iter_counter
        iter_counter += 1
    return CLASS


fix_index_mapping = get_mapper_fn(get_class_mapper())

In [9]:
import os
# get images
images=os.listdir('cars_train')
for fichier in images[:]: # filelist[:] makes a copy of filelist.
    if not(fichier.endswith(".jpg")):
        images.remove(fichier)

## Prepare annotation data

In [11]:
from imageio import imread
import scipy.io as sio

categories = readClasses("devkit/cars_meta.mat")
annotations = readAnnos("devkit/cars_train_annos.mat")

for img in images :
    shape = imread('cars_train/{}'.format(img)).shape
    jsonFile = img.split('.')[0]+'.json'
    
    line = {}
    line['file'] = img
    line['image_size'] = [{
        'width':int(shape[1]),
        'height':int(shape[0]),
        'depth':3
    }]
     
    line['annotations'] = []
    line['categories'] = [] 
    #print(annotations)
    for anno in annotations:
         if(anno[5][0]==img):
            #print(anno) 
            line['annotations'].append({
                    'class_id':int(fix_index_mapping(anno[4][0][0])),
                    'top':int(anno[1][0][0]),
                    'left':int(anno[0][0][0]),
                    'width':abs(int(anno[2][0][0])- int(anno[0][0][0])),
                    'height':abs(int(anno[3][0][0]) -int(anno[1][0][0])),
                })
            class_name = ''
            for ind,cat in enumerate(categories, start=1):
                if int(anno[4][0][0]) == ind:
                    class_name = str(cat)
            assert class_name != ''
            line['categories'].append({
                'class_id':int(anno[4][0][0]),
                'name':class_name
            })
   
    if line['annotations']:
        with open(os.path.join('car-generated', jsonFile),'w') as p:
            json.dump(line,p)      
         

  shape = imread('cars_train/{}'.format(img)).shape


In [12]:
import json
jsons = os.listdir('car-generated')

print ('There are {} images have annotation files.'.format(len(jsons)))

There are 8144 images have annotation files.


In [13]:
import shutil

train_jsons = jsons[:6516]
val_jsons = jsons[6516:]
 
#Moving training files to the training folders
for i in train_jsons:
    image_file = './cars_train/'+i.split('.')[0]+'.jpg'
    if  os.path.exists(image_file):
        shutil.move(image_file, './car-train/')
        shutil.move('./car-generated/'+i, './car-train_annotation/')

#Moving validation files to the validation folders
for i in val_jsons:
    image_file = './cars_train/'+i.split('.')[0]+'.jpg'
    if  os.path.exists(image_file):
        shutil.move(image_file, './car-validation/')
        shutil.move('./car-generated/'+i, './car-validation_annotation/')

## Upload to S3

In [21]:
%%time

train_channel = prefix + '/car-train'
validation_channel = prefix + '/car-validation'
train_annotation_channel = prefix + '/train_annotation'
validation_annotation_channel = prefix + '/validation_annotation'

sess.upload_data(path='car-train', bucket=bucket, key_prefix=train_channel)
sess.upload_data(path='car-validation', bucket=bucket, key_prefix=validation_channel)
sess.upload_data(path='car-train_annotation', bucket=bucket, key_prefix=train_annotation_channel)
sess.upload_data(path='car-validation_annotation', bucket=bucket, key_prefix=validation_annotation_channel)

s3_train_data = 's3://{}/{}'.format(bucket, train_channel)
s3_validation_data = 's3://{}/{}'.format(bucket, validation_channel)
s3_train_annotation = 's3://{}/{}'.format(bucket, train_annotation_channel)
s3_validation_annotation = 's3://{}/{}'.format(bucket, validation_annotation_channel)

CPU times: user 1min 30s, sys: 8.85 s, total: 1min 39s
Wall time: 16min 29s


In [22]:
s3_output_location = 's3://{}/{}/output'.format(bucket, prefix)

## Train model

In [39]:
od_model = sagemaker.estimator.Estimator(training_image,
                                         role, 
                                         train_instance_count=1, 
                                         train_instance_type='ml.m5.large',
                                         train_volume_size = 50,
                                         train_max_run = 360000,
                                         input_mode = 'File',
                                         output_path=s3_output_location,
                                         sagemaker_session=sess)

See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [40]:
od_model.set_hyperparameters(base_network='vgg-16',
                             use_pretrained_model=1,
                             num_classes=196,
                             mini_batch_size=16,
                             epochs=10,
                             learning_rate=0.001,
                             lr_scheduler_step='10',
                             lr_scheduler_factor=0.1,
                             optimizer='sgd',
                             momentum=0.9,
                             weight_decay=0.0005,
                             overlap_threshold=0.5,
                             nms_threshold=0.45,
                             image_shape=512,
                             label_width=600,
                             num_training_samples=6516)

In [41]:
train_data = sagemaker.session.s3_input(s3_train_data, distribution='FullyReplicated', 
                        content_type='image/jpeg', s3_data_type='S3Prefix')
validation_data = sagemaker.session.s3_input(s3_validation_data, distribution='FullyReplicated', 
                             content_type='image/jpeg', s3_data_type='S3Prefix')
train_annotation = sagemaker.session.s3_input(s3_train_annotation, distribution='FullyReplicated', 
                             content_type='image/jpeg', s3_data_type='S3Prefix')
validation_annotation = sagemaker.session.s3_input(s3_validation_annotation, distribution='FullyReplicated', 
                             content_type='image/jpeg', s3_data_type='S3Prefix')

data_channels = {'train': train_data, 'validation': validation_data, 
                 'train_annotation': train_annotation, 'validation_annotation':validation_annotation}

See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [42]:
od_model.fit(inputs=data_channels, logs=True)

INFO:sagemaker:Creating training-job with name: object-detection-2023-04-10-22-22-19-836


ClientError: An error occurred (ValidationException) when calling the CreateTrainingJob operation: Instance type ml.m5.large is not supported by algorithm object-detection; only GPU instances are supported.

## Hosting
Once the training is done, we can deploy the trained model as an Amazon SageMaker real-time hosted endpoint.

In [None]:
object_detector = od_model.deploy(initial_instance_count = 1,
                                 instance_type = 'ml.p2.8xlarge')