In [1]:
# import training libraries
from math import ceil

from keras import backend as K
from keras.models import load_model
from keras.optimizers import Adam, SGD
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TerminateOnNaN, CSVLogger

from models.keras_ssd300 import ssd_300
from keras_loss_function.keras_ssd_loss import SSDLoss
from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes
from keras_layers.keras_layer_DecodeDetections import DecodeDetections
from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast
from keras_layers.keras_layer_L2Normalization import L2Normalization

from ssd_encoder_decoder.ssd_input_encoder import SSDInputEncoder

from data_generator.object_detection_2d_data_generator import DataGenerator
from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels
from data_generator.object_detection_2d_patch_sampling_ops import RandomMaxCropFixedAR
from data_generator.object_detection_2d_geometric_ops import Resize
from data_generator.data_augmentation_chain_original_ssd import SSDDataAugmentation


Using TensorFlow backend.


# TRANSFER LEARNING
[INDEX](#INDEX "Goto INDEX")
## Training from pretrained weights
Pretrained Weights- Trained on pascal-voc data<Br>
New Training- Training for aerial drone data

## 1. Modify model weights structure of classification layers for training on new objects and save

In [2]:
import h5py
import numpy as np
import shutil

from misc_utils.tensor_sampling_utils import sample_tensors

In [2]:
# load weights of the pretrained model
weights_path = 'pretrained_weights/VGG_VOC0712_SSD_300x300_iter_120000_car.h5'
weights_to_be_trained = h5py.File(weights_path, mode='r+')

In [6]:
# classifier layer names in the model, first 3 have same number while other 3 have same number
classifier_names1 = ['conv4_3_norm_mbox_conf',
                    'conv8_2_mbox_conf',
                    'conv9_2_mbox_conf']

classifier_names2 = ['fc7_mbox_conf',
                    'conv6_2_mbox_conf',
                    'conv7_2_mbox_conf']

In [7]:
# check current shape of one of the classifier layers
conv4_3_norm_mbox_conf_kernel = weights_to_be_trained[classifier_names1[0]][classifier_names1[0]]['kernel:0']
conv4_3_norm_mbox_conf_bias = weights_to_be_trained[classifier_names1[0]][classifier_names1[0]]['bias:0']
# 20 classes in the voc dataset with 1 background class
# 4 boxes for each class and hence 21*4 = 84 predictions
conv4_3_norm_mbox_conf_kernel, conv4_3_norm_mbox_conf_bias

(<HDF5 dataset "kernel:0": shape (3, 3, 512, 84), type "<f4">,
 <HDF5 dataset "bias:0": shape (84,), type "<f4">)

In [8]:
# TODO: Modify the n_classes number as per the new object detection model to be trained
# Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO
n_classes = 5 # set new number of classes to train on

for name in classifier_names1:
    # Get the trained weights for this layer from the source HDF5 weights file.
    kernel = weights_to_be_trained[name][name]['kernel:0'].value
    bias = weights_to_be_trained[name][name]['bias:0'].value

    # Get the shape of the kernel. We're interested in sub-sampling
    height, width, in_channels, out_channels = kernel.shape
    
    # Sub-sample the kernel and bias.
    new_kernel, new_bias = sample_tensors(weights_list=[kernel, bias],
                                          sampling_instructions=[height, width, in_channels, (n_classes+1)*4],
                                          axes=[[3]], # The one bias dimension corresponds to the last kernel dimension.
                                          init=['gaussian', 'zeros'],
                                          mean=0.0,
                                          stddev=0.005)
    
    # Delete the old weights from the destination file.
    del weights_to_be_trained[name][name]['kernel:0']
    del weights_to_be_trained[name][name]['bias:0']
    # Create new datasets for the sub-sampled weights.
    weights_to_be_trained[name][name].create_dataset(name='kernel:0', data=new_kernel)
    weights_to_be_trained[name][name].create_dataset(name='bias:0', data=new_bias)

for name in classifier_names2:
    # Get the trained weights for this layer from the source HDF5 weights file.
    kernel = weights_to_be_trained[name][name]['kernel:0'].value
    bias = weights_to_be_trained[name][name]['bias:0'].value

    # Get the shape of the kernel. We're interested in sub-sampling
    height, width, in_channels, out_channels = kernel.shape
    
    # Sub-sample the kernel and bias.
    new_kernel, new_bias = sample_tensors(weights_list=[kernel, bias],
                                          sampling_instructions=[height, width, in_channels, (n_classes+1)*6],
                                          axes=[[3]], # The one bias dimension corresponds to the last kernel dimension.
                                          init=['gaussian', 'zeros'],
                                          mean=0.0,
                                          stddev=0.005)
    
    # Delete the old weights from the destination file.
    del weights_to_be_trained[name][name]['kernel:0']
    del weights_to_be_trained[name][name]['bias:0']
    # Create new datasets for the sub-sampled weights.
    weights_to_be_trained[name][name].create_dataset(name='kernel:0', data=new_kernel)
    weights_to_be_trained[name][name].create_dataset(name='bias:0', data=new_bias)

# Make sure all data is written to our output file before this sub-routine exits.
weights_to_be_trained.flush()

  import sys
  


## 2. Set the model configuration parameters

In [3]:
# TODO: Modify the n_classes number as per the new object detection model to be trained
# set model parameters
img_height = 300 # Height of the input images
img_width = 300 # Width of the input images
img_channels = 3 # Number of color channels of the input images
mean_color = [123, 117, 104] # The per-channel mean of the images in the dataset. 
n_classes = 5 # set new number of classes to train on
# Do not change this value if you're using any of the pre-trained weights.
swap_channels = [2, 1, 0] # The color channel order in the original SSD is BGR, so we should set this to `True`, but weirdly the results are better without swapping.
scales = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets.
# scales = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets.
aspect_ratios = [[1.0, 2.0, 0.5],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5],
                 [1.0, 2.0, 0.5]] # The anchor box aspect ratios used in the original SSD300; the order matters
two_boxes_for_ar1 = True
steps = [8, 16, 32, 64, 100, 300] # The space between two adjacent anchor box center points for each predictor layer.
offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.
clip_boxes = False # Whether or not you want to limit the anchor boxes to lie entirely within the image boundaries
variances = [0.1, 0.1, 0.2, 0.2] # The variances by which the encoded target coordinates are scaled as in the original implementation
normalize_coords = True

## 3. Build the model using modified pretrained weights above

In [4]:
# 1: Build the Keras model.
K.clear_session() # Clear previous models from memory.

model = ssd_300(image_size=(img_height, img_width, img_channels),
                n_classes=n_classes,
                mode='training',
                l2_regularization=0.0005,
                scales=scales,
                aspect_ratios_per_layer=aspect_ratios,
                two_boxes_for_ar1=two_boxes_for_ar1,
                steps=steps,
                offsets=offsets,
                clip_boxes=clip_boxes,
                variances=variances,
                normalize_coords=normalize_coords,
                subtract_mean=mean_color,
                swap_channels=swap_channels)

# 2: Load pretrained weights
model.load_weights(weights_path, by_name=True)

# 3: Instantiate an Adam optimizer and the SSD loss function and compile the model.
adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
model.compile(optimizer=adam, loss=ssd_loss.compute_loss)


W0710 16:07:55.572596  3200 module_wrapper.py:139] From C:\Users\abhinav.jhanwar\AppData\Local\Continuum\anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:95: The name tf.reset_default_graph is deprecated. Please use tf.compat.v1.reset_default_graph instead.

W0710 16:07:55.580714  3200 module_wrapper.py:139] From C:\Users\abhinav.jhanwar\AppData\Local\Continuum\anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:98: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0710 16:07:55.587980  3200 module_wrapper.py:139] From C:\Users\abhinav.jhanwar\AppData\Local\Continuum\anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:102: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0710 16:07:55.589063  3200 module_wrapper.py:139] From C:\Users\abhinav.jhanwar\AppData\Local\Continuum\anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:51

### use repo https://github.com/tzutalin/labelImg to download labelImg software and generate custom dataset for training

## 4. Set up the data generators for the training

In [5]:
# data generation

# 1: Instantiate two `DataGenerator` objects: One for training, one for validation.
# If you have enough memory, consider loading the images into memory
train_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None)
val_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None)

# 2: Parse the image and label lists for the training and validation datasets. This can take a while.
# The directories that contain the images.
train_images_dir      = 'train/images/'
val_images_dir      = 'val/images/'

# The directories that contain the annotations.
train_annotations_dir      = 'train/annotations/'
val_annotations_dir      = 'val/annotations/'

# The directories that contain the imageset
train_image_set_filename = 'train/train.txt'
val_image_set_filename   = 'val/val.txt'

# The XML parser needs to now what object class names to look for and in which order to map them to integers.
classes = ['background',
           'car', 'truck', 'bus', 'minibus', 'cyclist']

train_dataset.parse_xml(images_dirs=[train_images_dir],
                        image_set_filenames=[train_image_set_filename],
                        annotations_dirs=[train_annotations_dir],
                        classes=classes,
                        include_classes='all',
                        exclude_truncated=False,
                        exclude_difficult=False,
                        ret=False)

val_dataset.parse_xml(images_dirs=[val_images_dir],
                      image_set_filenames=[val_image_set_filename],
                      annotations_dirs=[val_annotations_dir],
                      classes=classes,
                      include_classes='all',
                      exclude_truncated=False,
                      exclude_difficult=True,
                      ret=False)

# Optional: Convert the dataset into an HDF5 dataset. This will require more disk space, but will
# speed up the training. Doing this is not relevant in case you activated the `load_images_into_memory`
# option in the constructor, because in that cas the images are in memory already anyway. If you don't
# want to create HDF5 datasets, comment out the subsequent two function calls.

'''train_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07+12_trainval.h5',
                                  resize=False,
                                  variable_image_size=True,
                                  verbose=True)

val_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5',
                                resize=False,
                                variable_image_size=True,
                                verbose=True)'''

Processing image set 'train.txt': 100%|██████████████████████████████████████████████| 205/205 [00:05<00:00, 39.88it/s]
Loading images into memory: 100%|████████████████████████████████████████████████████| 205/205 [00:24<00:00,  8.45it/s]
Processing image set 'val.txt': 100%|██████████████████████████████████████████████████| 50/50 [00:01<00:00, 26.67it/s]
Loading images into memory: 100%|██████████████████████████████████████████████████████| 50/50 [00:12<00:00,  3.86it/s]


"train_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07+12_trainval.h5',\n                                  resize=False,\n                                  variable_image_size=True,\n                                  verbose=True)\n\nval_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5',\n                                resize=False,\n                                variable_image_size=True,\n                                verbose=True)"

In [12]:
# 3: Set the batch size.

batch_size = 16 # Change the batch size if you like, or if you run into GPU memory issues.

# 4: Set the image transformations for pre-processing and data augmentation options.

# For the training generator:
ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
                                            img_width=img_width,
                                            background=mean_color)

# For the validation generator:
convert_to_3_channels = ConvertTo3Channels()
resize = Resize(height=img_height, width=img_width)

# 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

# The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
                   model.get_layer('fc7_mbox_conf').output_shape[1:3],
                   model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
                   model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
                   model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
                   model.get_layer('conv9_2_mbox_conf').output_shape[1:3]]

ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                    img_width=img_width,
                                    n_classes=n_classes,
                                    predictor_sizes=predictor_sizes,
                                    scales=scales,
                                    aspect_ratios_per_layer=aspect_ratios,
                                    two_boxes_for_ar1=two_boxes_for_ar1,
                                    steps=steps,
                                    offsets=offsets,
                                    clip_boxes=clip_boxes,
                                    variances=variances,
                                    matching_type='multi',
                                    pos_iou_threshold=0.5,
                                    neg_iou_limit=0.5,
                                    normalize_coords=normalize_coords)

# 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.

train_generator = train_dataset.generate(batch_size=batch_size,
                                         shuffle=True,
                                         transformations=[ssd_data_augmentation],
                                         label_encoder=ssd_input_encoder,
                                         returns={'processed_images',
                                                  'encoded_labels'},
                                         keep_images_without_gt=False)

val_generator = val_dataset.generate(batch_size=batch_size,
                                     shuffle=False,
                                     transformations=[convert_to_3_channels,
                                                      resize],
                                     label_encoder=ssd_input_encoder,
                                     returns={'processed_images',
                                              'encoded_labels'},
                                     keep_images_without_gt=False)

# Get the number of samples in the training and validations datasets.
train_dataset_size = train_dataset.get_dataset_size()
val_dataset_size   = val_dataset.get_dataset_size()

print("Number of images in the training dataset:\t{:>6}".format(train_dataset_size))
print("Number of images in the validation dataset:\t{:>6}".format(val_dataset_size))

Number of images in the training dataset:	    94
Number of images in the validation dataset:	     8


## 5. Set the remaining training parameters

In [13]:
# Define a learning rate schedule.

def lr_schedule(epoch):
    if epoch < 80:
        return 0.001
    elif epoch < 100:
        return 0.0001
    else:
        return 0.00001

In [14]:
# Define model callbacks.

# TODO: Set the filepath under which you want to save the model.
model_checkpoint = ModelCheckpoint(filepath='ssd300_aerial_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5',
                                   monitor='val_loss',
                                   verbose=1,
                                   save_best_only=True,
                                   save_weights_only=False,
                                   mode='auto',
                                   period=1)

csv_logger = CSVLogger(filename='ssd300_aerial_training_log.csv',
                       separator=',',
                       append=True)

learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule,
                                                verbose=1)

terminate_on_nan = TerminateOnNaN()

callbacks = [model_checkpoint,
             csv_logger,
             learning_rate_scheduler,
             terminate_on_nan]


## 6. Train

In [15]:
# If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly.
initial_epoch   = 0
final_epoch     = 10
steps_per_epoch = ceil(train_dataset_size/batch_size)

history = model.fit_generator(generator=train_generator,
                              steps_per_epoch=steps_per_epoch,
                              epochs=final_epoch,
                              callbacks=callbacks,
                              validation_data=val_generator,
                              validation_steps=ceil(val_dataset_size/batch_size),
                              initial_epoch=initial_epoch)

W0710 13:06:25.508057 17416 module_wrapper.py:139] From C:\Users\abhinav.jhanwar\AppData\Local\Continuum\anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.

W0710 13:06:26.909329 17416 module_wrapper.py:139] From C:\Users\abhinav.jhanwar\AppData\Local\Continuum\anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:973: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.



Epoch 1/10

Epoch 00001: LearningRateScheduler setting learning rate to 0.001.

KeyboardInterrupt: 