In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"
import copy
import random
from matplotlib import pyplot as plt
import numpy as np
#From keras
import keras
from keras.models import load_model
# from keras.applications.mobilenet_v2 import preprocess_input, decode_predictions
from keras import optimizers,regularizers
from keras.utils import multi_gpu_model
from keras import backend as K
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TerminateOnNaN, CSVLogger

# From ssd_keras
from data_generator.object_detection_2d_data_generator import DataGenerator
from data_generator.data_augmentation_chain_original_ssd import SSDDataAugmentation
from data_generator.object_detection_2d_geometric_ops import Resize
from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels
from data_generator.data_augmentation_chain_original_ssd import SSDDataAugmentation
from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms
from ssd_encoder_decoder.ssd_input_encoder import SSDInputEncoder
import ssd_encoder_decoder.ssd_output_decoder
from ssd_encoder_decoder.ssd_output_decoder import decode_detections
from SSD_loss import SSDLoss
from eval_utils.average_precision_evaluator import Evaluator
from importlib import reload
#import cv2
from detector_help import process_y,post_process,prior_box
import detection_nets,classification_nets
import tensorflow as tf
from matplotlib import pyplot as plt
from drawing import *
from detection_nets import load_mobilenetv2
%matplotlib inline

Using TensorFlow backend.


### Set model config

In [2]:
input_H = 300
input_W = 300
input_C = 3
input_shape = (input_H, input_W, input_C)
model_name = "ssdlite_mn2"
root_path = "/home/cai/dataset/VOCdevkit"
aspect_ratios = [[2,3],
                 [2,3],
                 [2,3],
                 [2,3],
                 [2],
                 [2]]

mean_color = [123, 117, 104] 
swap_channels = [2, 1, 0] # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images.
num_classes = 20 # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO
variances = [0.1, 0.1, 0.2, 0.2]
# scale = [0.07, 0.15, 0.37, 0.54, 0.71, 0.88, 1.05]
scale = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05]

### Build model 

In [3]:
reload(detection_nets)
K.clear_session()
build_model = detection_nets.build_ssdlite
#Don`t tune learning rate here because we will use a lr scheduler in callbacks
#Orignal paper used SGD but according to the author of ssd_keras, use adam is better 
adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
ssd_loss = SSDLoss(neg_pos_ratio=3,alpha=1.0)
base_model = load_mobilenetv2()
#     base_model.summary()
prior_config =  [2 + len(ar) * 2 for ar in aspect_ratios]  # number of boxes per feature map location
source_layer = "block_12_expand"
model = build_model(base_model = base_model,
               prior_config = prior_config,
               source_layer_name_1 = source_layer,
               num_classes = num_classes)


# for layer in model.layers[:120]:
#     layer.trainable = False
model.summary()                       #Comment this line if you don`t want to show summary every time you build your model

model = detection_nets.preprocess(input_shape,model,mean_color,swap_channels)
# model = multi_gpu_model(model,gpus=2) #Comment it if don`t use multi_gpu 
model.compile(adam,
              loss=ssd_loss.loss,
              metrics = [ssd_loss.class_loss,ssd_loss.loc_loss]
              )
print(model_name,'has been built ')



Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 300, 300, 3)  0                                            
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 301, 301, 3)  0           input_3[0][0]                    
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 150, 150, 32) 864         Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 150, 150, 32) 128         Conv1[0][0]                      
____________________________________________________________________________________________

ssdlite_mn2 has been built 


### Load previously trained models 

In [None]:
K.clear_session() # Clear previous models from memory.

model_path = "ssdlite_mn2_pascal_07+12_Final.h5"
adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
ssd_loss = SSDLoss(neg_pos_ratio=3,alpha=1.0)
# #Load old model
# model.compile(adam,
#               loss=ssd_loss.loss,
#               metrics = [ssd_loss.class_loss,ssd_loss.loc_loss]
#               )
# model = multi_gpu_model(model,gpus=2) #Comment it if don`t use multi_gpu 
model = load_model(model_path,custom_objects={'loss': ssd_loss.loss,'class_loss':ssd_loss.class_loss,'loc_loss':ssd_loss.loc_loss})
#model = load_model(model_path,custom_objects={'loss': ssd_loss.loss,'class loss': ssd_loss.class_loss, 'loc loss': ssd_loss.loc_loss})
print("load model from",model_path)   

In [None]:
import time
batch_n = 100
img_height = 300
img_width = 300
img_channels = 3
# detector = Detect(num_classes,0,cfg)
batch_size_range = [1,2,4,8,16,32]#[1,2,4,8]
print('Start testing...')
for batch_size in batch_size_range:
    time_total = 0
    time_net = 0
    time_post = 0
    print('batch size',batch_size)
    for i in range(0,batch_n):
        test_batch = np.random.rand(batch_size,img_height,img_width,img_channels)
        click = time.time()
        y_pred = model.predict(test_batch)
        time_net += (time.time() - click)
        click = time.time()
        y_pred_decoded = decode_detections(y_pred,
                          priors,variances,
                          img_height = input_H,
                          img_width = input_W,
                          confidence_thresh = 0.5,
                          iou_threshold = 0.45)
        time_post += (time.time() - click)
    time_total = time_net + time_post
    print('Time cost per batch: %.3f FPS: %.1f'%(time_total / batch_n,batch_n*batch_size/ time_total))
    print('Time(pure forward) cost per batch: %.3f FPS: %.1F'%(time_net / batch_n, batch_n*batch_size/ time_net))

          

### Define featue map as predictor size 

In [4]:
# feature_map_300 = [38,19,10,5,3,1]
feature_map_300 = [19,10,5,3,3,1]
feature_map_224 = [7,7,4,4,2,1]

In [5]:
#Define class_names
class_names = ['background',
           'aeroplane', 'bicycle', 'bird', 'boat',
           'bottle', 'bus', 'car', 'cat',
           'chair', 'cow', 'diningtable', 'dog',
           'horse', 'motorbike', 'person', 'pottedplant',
           'sheep', 'sofa', 'train', 'tvmonitor']

force_create_dataset = False #Force data generator to load dataset from source

## Load Dataset

In [6]:
root_path = '/home/cai/dataset/VOCdevkit'
trainset_hdf5_path = 'dataset_pascal_voc_07+12_trainval.h5'
valset_hdf5_path = 'dataset_pascal_voc_07_test.h5'

# The directories that contain the images.
VOC_2007_images_dir      = root_path + '/VOC2007/JPEGImages/'
VOC_2012_images_dir      = root_path + '/VOC2012/JPEGImages/'

# The directories that contain the annotations.
VOC_2007_annotations_dir      = root_path + '/VOC2007/Annotations/'
VOC_2012_annotations_dir      = root_path + '/VOC2012/Annotations/'

# The paths to the image sets.
VOC_2007_train_image_set_filename    = root_path + '/VOC2007/ImageSets/Main/train.txt'
VOC_2012_train_image_set_filename    = root_path + '/VOC2012/ImageSets/Main/train.txt'
VOC_2007_val_image_set_filename      = root_path + '/VOC2007/ImageSets/Main/val.txt'
VOC_2012_val_image_set_filename      = root_path + '/VOC2012/ImageSets/Main/val.txt'
VOC_2007_trainval_image_set_filename = root_path + '/VOC2007/ImageSets/Main/trainval.txt'
VOC_2012_trainval_image_set_filename = root_path + '/VOC2012/ImageSets/Main/trainval.txt'
VOC_2007_test_image_set_filename     = root_path + '/VOC2007/ImageSets/Main/test.txt'

if os.path.isfile(trainset_hdf5_path) and os.path.isfile(valset_hdf5_path) and not force_create_dataset:
    #Load dataset from created hdf5_dataset
    train_dataset = DataGenerator(hdf5_dataset_path = trainset_hdf5_path)
    val_dataset = DataGenerator(hdf5_dataset_path = valset_hdf5_path)
else:
    train_dataset = DataGenerator()
    val_dataset = DataGenerator()
    
    train_dataset.parse_xml(images_dirs=[VOC_2007_images_dir,
                                         VOC_2012_images_dir],
                            image_set_filenames=[VOC_2007_trainval_image_set_filename,
                                                 VOC_2012_trainval_image_set_filename],
                            annotations_dirs=[VOC_2007_annotations_dir,
                                              VOC_2012_annotations_dir],
                            classes=class_names,
                            include_classes='all',
                            exclude_truncated=False,
                            exclude_difficult=False,
                            ret=False)

    val_dataset.parse_xml(images_dirs=[VOC_2007_images_dir],
                          image_set_filenames=[VOC_2007_test_image_set_filename],
                          annotations_dirs=[VOC_2007_annotations_dir],
                          classes=class_names,
                          include_classes='all',
                          exclude_truncated=False,
                          exclude_difficult=True,
                          ret=False)
    train_dataset.create_hdf5_dataset(file_path=trainset_hdf5_path,
                                      resize=False,
                                      variable_image_size=True,
                                      verbose=True)

    val_dataset.create_hdf5_dataset(file_path=valset_hdf5_path,
                                    resize=False,
                                    variable_image_size=True,
                                    verbose=True)

Loading labels: 100%|██████████| 16551/16551 [00:04<00:00, 3656.43it/s]
Loading image IDs: 100%|██████████| 16551/16551 [00:02<00:00, 7886.31it/s]
Loading evaluation-neutrality annotations: 100%|██████████| 16551/16551 [00:02<00:00, 6276.41it/s]
Loading labels: 100%|██████████| 4952/4952 [00:01<00:00, 3646.93it/s]
Loading image IDs: 100%|██████████| 4952/4952 [00:00<00:00, 8293.43it/s]
Loading evaluation-neutrality annotations: 100%|██████████| 4952/4952 [00:00<00:00, 6149.08it/s]


## Data Augment 

In [7]:
import detector_help
reload(detector_help)
from detector_help import *
import data_augment
reload(data_augment)
from data_augment import *

batch_size = 32
ssd_data_augmentation = SSDDataAugmentation(img_height=input_H,
                                            img_width=input_W,
                                            background=mean_color)

# For the validation generator:
convert_to_3_channels = ConvertTo3Channels()
resize = Resize(height=input_H, width=input_W)

# 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

# The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
priors = prior_box(feature_map_300,aspect_ratios,scale = scale,clip = False)
label_encoder = LabelEncoder(num_classes,priors,variances,input_H,input_W)

# ssd_input_encoder = SSDInputEncoder(img_height=input_H,
#                                     img_width=input_W,
#                                     n_classes=num_classes,
#                                     predictor_sizes=feature_map,
#                                     scales=scales,
#                                     aspect_ratios_per_layer=aspect_ratios,
#                                     two_boxes_for_ar1=two_boxes_for_ar1,
#                                     steps=steps,
#                                     offsets=offsets,
#                                     clip_boxes=clip_boxes,
#                                     variances=variances,
#                                     matching_type='multi',
#                                     pos_iou_threshold=0.5,
#                                     neg_iou_limit=0.5,
#                                     normalize_coords=normalize_coords)

# 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.

train_generator = train_dataset.generate(batch_size=batch_size,
                                         shuffle=True,
                                         transformations=[ssd_data_augmentation],
                                         label_encoder=label_encoder,
                                         returns={'processed_images',
                                                  'encoded_labels'},
                                         keep_images_without_gt=False)

val_generator = val_dataset.generate(batch_size=batch_size,
                                     shuffle=False,
                                     transformations=[convert_to_3_channels,
                                                      resize],
                                     label_encoder=label_encoder,
                                     returns={'processed_images',
                                              'encoded_labels'},
                                     keep_images_without_gt=False)

# Get the number of samples in the training and validations datasets.
train_dataset_size = train_dataset.get_dataset_size()
val_dataset_size   = val_dataset.get_dataset_size()
print("Number of priors:\t{:>6}".format(len(priors)))
print("Number of images in the training dataset:\t{:>6}".format(train_dataset_size))
print("Number of images in the validation dataset:\t{:>6}".format(val_dataset_size))

Number of priors:	  3010
Number of images in the training dataset:	 16551
Number of images in the validation dataset:	  4952


## Define callback function

In [10]:
#1. Set file path
weights_folder = 'saved_weights/'

if not os.path.isdir(weights_folder):
    os.mkdir(weights_folder)

checkpoint_filepath = weights_folder + model_name + '_pascal_07+12_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5'
log_filepath = model_name + '_pascal_07+12_training_log.csv' 
#2. Define lr schedule function
def lr_schedule(epoch):
    if epoch < 70:
        return 0.0005
    elif epoch < 110:
        return 0.0001
    else:
        return 1e-5
    
#3. ## Define callbacks
model_checkpoint = ModelCheckpoint(filepath = checkpoint_filepath,
                                   monitor='val_loss',
                                   verbose=1,
                                   save_best_only=True,
                                   save_weights_only=False,
                                   mode='auto',
                                   period=5)

csv_logger = CSVLogger(filename=log_filepath,
                           separator=',',
                           append=True)

learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule,
                                                verbose=1)

terminate_on_nan = TerminateOnNaN()

callbacks = [model_checkpoint,
             csv_logger,
             learning_rate_scheduler,
             terminate_on_nan]
# callbacks = [csv_logger,learning_rate_scheduler]

## Draw a batch of images(optional)

In [None]:
train_flow_origin = train_dataset.generate(batch_size = 4,
                                           transformations=[], 
                                           label_encoder = label_encoder,
                                           returns = ['original_images','original_labels'])
images,labels = next(train_flow_origin)
for img,label in zip(images, labels):
    draw_detection(img,label,class_names)           

### Debug encoder-decoder and data augment

In [None]:
import detector_help
reload(detector_help)
import ssd_encoder_decoder.ssd_output_decoder
reload(ssd_encoder_decoder.ssd_output_decoder)
from ssd_encoder_decoder.ssd_output_decoder import decode_detections
from detector_help import LabelEncoder
priors = prior_box(feature_map_300,aspect_ratios,scale = scale,clip = False)
label_encoder = LabelEncoder(num_classes,priors,variances,input_H,input_W)
train_flow_encode = train_dataset.generate(batch_size = 4,
                                           transformations= [convert_to_3_channels,
                                                      resize], 
                                           label_encoder = label_encoder,
                                           shuffle = False,
                                           returns=["processed_images","processed_labels","encoded_labels"])

images,enc_labels,proc_labels = next(train_flow_encode)
enc_labels_custom = detector_help.post_process(enc_labels,
                                               priors,
                                               variances,
                                               num_classes,
                                               input_H,input_W, 
                                               score_thresh = 0.06,   
                                               iou_thresh = 0.01,
                                               top_k = 1000) # Use top_k as 1000 because all gt labels`s conf are 1
enc_labels_default = decode_detections(enc_labels,
                  priors,
                  np.array(variances),
                  img_height = input_H,
                  img_width = input_W,
                  confidence_thresh = 0.06,
                  iou_threshold = 0.45)

for img,enc_default_label,enc_custom_label,proc_label in zip(images,enc_labels_default,enc_labels_custom,proc_labels):
#     print(enc_custom_label)
    draw_detection(img, proc_label, class_names, color = 'green')
    #plt.title('Default decoder')
    #draw_detection(img, enc_default_label, class_names, color = 'red')
   # plt.title('Custom decoder')
    draw_detection(img, enc_default_label, class_names, color = 'purple')

In [None]:

import time
base_img = np.zeros((300,300,3))
plt.imshow(base_img)
current_axis = plt.gca()
print(len(priors))
for k,box in enumerate(priors[4:6*38:6]):
#     plt.imshow(base_img)
#     current_axis = plt.gca()
#     if k%100 == 0:
#         base_img = np.zeros((300,300,3))
#         #plt.imshow(base_img)
#         current_axis = plt.gca()
    xmid,ymid,w,h = box[-4:]
    xmid *= 300
    ymid *= 300
    w *= 300
    h *= 300
    if k < 1000:
        color = 'red'
    else:
        color = 'green'
    current_axis.add_patch(plt.Rectangle((xmid - w/2, ymid - h/2), w,h, color=color, fill=False, linewidth=2)) 

### Set training params

In [11]:
#If load weights from files,initial_epoch need to be set to the next epoch to be trained.
initial_epoch = 0
final_epochs = 120
steps_per_epoch = 1000
plot = True

## Train the model

In [None]:
print('model name' ,model_name , \
       '\nstart fitting.. ' , 
       'inital epoch:', initial_epoch,
       'final epoch:', final_epochs,
       'epoch step:', steps_per_epoch,
       'plot_history:' ,plot, \
       '\ntrainset size:',  train_dataset.get_dataset_size(), ' batch_size', batch_size )     

validation_steps =  val_dataset.get_dataset_size() // batch_size
history = model.fit_generator(train_generator,
                              use_multiprocessing = True, 
                              steps_per_epoch = steps_per_epoch, 
                              validation_data = val_generator,
                              validation_steps = validation_steps, 
                              epochs = final_epochs,
                              callbacks = callbacks,
                              initial_epoch = initial_epoch)

if plot:
    plot_history(history)
    plt.savefig('./rfbmn2_300_pascal07+12_train.jpg')



model name ssdlite_mn2 
start fitting..  inital epoch: 0 final epoch: 120 epoch step: 1000 plot_history: True 
trainset size: 16551  batch_size 32
Epoch 1/120

Epoch 00001: LearningRateScheduler setting learning rate to 0.0005.
Epoch 2/120

Epoch 00002: LearningRateScheduler setting learning rate to 0.0005.
Epoch 3/120

Epoch 00003: LearningRateScheduler setting learning rate to 0.0005.
Epoch 4/120

Epoch 00004: LearningRateScheduler setting learning rate to 0.0005.
Epoch 5/120

Epoch 00005: LearningRateScheduler setting learning rate to 0.0005.

Epoch 00005: val_loss improved from inf to 5.53986, saving model to saved_weights/ssdlite_mn2_pascal_07+12_epoch-05_loss-5.5904_val_loss-5.5399.h5
Epoch 6/120

Epoch 00006: LearningRateScheduler setting learning rate to 0.0005.
Epoch 7/120

Epoch 00007: LearningRateScheduler setting learning rate to 0.0005.
Epoch 8/120

Epoch 00008: LearningRateScheduler setting learning rate to 0.0005.
Epoch 9/120

Epoch 00009: LearningRateScheduler setting l

Epoch 29/120

Epoch 00029: LearningRateScheduler setting learning rate to 0.0005.
Epoch 30/120

Epoch 00030: LearningRateScheduler setting learning rate to 0.0005.

Epoch 00030: val_loss improved from 3.46691 to 3.29792, saving model to saved_weights/ssdlite_mn2_pascal_07+12_epoch-30_loss-3.1573_val_loss-3.2979.h5
Epoch 31/120

Epoch 00031: LearningRateScheduler setting learning rate to 0.0005.
Epoch 32/120

Epoch 00032: LearningRateScheduler setting learning rate to 0.0005.
Epoch 33/120

Epoch 00033: LearningRateScheduler setting learning rate to 0.0005.
 147/1000 [===>..........................] - ETA: 12:50 - loss: 3.1400 - class_loss: 2.1607 - loc_loss: 0.9793

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Epoch 00035: val_loss improved from 3.29792 to 3.24512, saving model to saved_weights/ssdlite_mn2_pascal_07+12_epoch-35_loss-3.0611_val_loss-3.2451.h5
Epoch 36/120

Epoch 00036: LearningRateScheduler setting learning rate to 0.0005.
Epoch 37/120

Epoch 00037: LearningRateScheduler setting learning rate to 0.0005.
Epoch 38/120

Epoch 00038: LearningRateScheduler setting learning rate to 0.0005.
Epoch 39/120

Epoch 00039: LearningRateScheduler setting learning rate to 0.0005.
Epoch 40/120

Epoch 00040: LearningRateScheduler setting learning rate to 0.0005.

Epoch 00040: val_loss improved from 3.24512 to 3.15399, saving model to saved_weights/ssdlite_mn2_pascal_07+12_epoch-40_loss-3.0243_val_loss-3.1540.h5
Epoch 41/120

Epoch 00041: LearningRateScheduler setting learning rate to 0.0005.
Epoch 42/120

Epoch 00042: LearningRateScheduler setting learning rate to 0.0005.
Epoch 43/120

Epoch 00043: LearningRateScheduler setting learning rate to 0.0005.
Epoch 44/120

Epoch 00044: LearningRateS

Epoch 64/120

Epoch 00064: LearningRateScheduler setting learning rate to 0.0005.
Epoch 65/120

Epoch 00065: LearningRateScheduler setting learning rate to 0.0005.

Epoch 00065: val_loss did not improve from 3.09324
Epoch 66/120

Epoch 00066: LearningRateScheduler setting learning rate to 0.0005.
Epoch 67/120

Epoch 00067: LearningRateScheduler setting learning rate to 0.0005.
Epoch 68/120

Epoch 00068: LearningRateScheduler setting learning rate to 0.0005.
Epoch 69/120

Epoch 00069: LearningRateScheduler setting learning rate to 0.0005.
Epoch 70/120

Epoch 00070: LearningRateScheduler setting learning rate to 0.0005.

Epoch 00070: val_loss improved from 3.09324 to 3.05560, saving model to saved_weights/ssdlite_mn2_pascal_07+12_epoch-70_loss-2.7416_val_loss-3.0556.h5
Epoch 71/120

Epoch 00071: LearningRateScheduler setting learning rate to 0.0001.

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Epoch 00075: val_loss improved from 3.05560 to 2.77791, saving model to saved_weights/ssdlite_mn2_pascal_07+12_epoch-75_loss-2.5217_val_loss-2.7779.h5
Epoch 76/120

Epoch 00076: LearningRateScheduler setting learning rate to 0.0001.
Epoch 77/120

Epoch 00077: LearningRateScheduler setting learning rate to 0.0001.
Epoch 78/120

Epoch 00078: LearningRateScheduler setting learning rate to 0.0001.

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 80/120

Epoch 00080: LearningRateScheduler setting learning rate to 0.0001.

Epoch 00080: val_loss did not improve from 2.77791
Epoch 81/120

Epoch 00081: LearningRateScheduler setting learning rate to 0.0001.
Epoch 82/120

Epoch 00082: LearningRateScheduler setting learning rate to 0.0001.
Epoch 83/120

Epoch 00083: LearningRateScheduler setting learning rate to 0.0001.
Epoch 84/120

Epoch 00084: LearningRateScheduler setting learning rate to 0.0001.
  47/1000 [>.............................] - ETA: 9:24 - loss: 2.4132 - class_loss: 1.6650 - loc_loss: 0.7482

In [None]:
model.layers[-1].summary()

### Evaluate

In [None]:
import eval_utils.average_precision_evaluator
reload(eval_utils.average_precision_evaluator)
from eval_utils.average_precision_evaluator import Evaluator

In [None]:
test_dataset = DataGenerator()
test_dataset.parse_xml(images_dirs=[VOC_2007_images_dir],
                      image_set_filenames=[VOC_2007_test_image_set_filename],
                      annotations_dirs=[VOC_2007_annotations_dir],
                      classes=class_names,
                      include_classes='all',
                      exclude_truncated=False,
                      exclude_difficult=False,
                      ret=False)
print('Test size', test_dataset.get_dataset_size())

In [None]:
evaluator = Evaluator(model=model,
                      n_classes=num_classes,
                      data_generator=test_dataset,
                      model_mode='training')

results = evaluator(img_height=input_H,
                    img_width=input_W,
                    batch_size=batch_size,
                    priors = priors,
                    variances = variances,
                    data_generator_mode='resize',
                    round_confidences=False,
                    matching_iou_threshold=0.5,
                    border_pixels='include',
                    sorting_algorithm='quicksort',
                    average_precision_mode='sample',
                    num_recall_points=11,
                    ignore_neutral_boxes=True,
                    return_precisions=True,
                    return_recalls=True,
                    return_average_precisions=True,
                    verbose=True)

mean_average_precision, average_precisions, precisions, recalls = results

In [None]:
for i in range(1, len(average_precisions)):
    print("{:<14}{:<6}{}".format(class_names[i], 'AP', round(average_precisions[i], 3)))
print()
print("{:<14}{:<6}{}".format('','mAP', round(mean_average_precision, 3)))

In [None]:
m = max((num_classes + 1) // 2, 2)
n = 2

fig, cells = plt.subplots(m, n, figsize=(n*8,m*8))
for i in range(m):
    for j in range(n):
        if n*i+j+1 > num_classes: break
        cells[i, j].plot(recalls[n*i+j+1], precisions[n*i+j+1], color='blue', linewidth=1.0)
        cells[i, j].set_xlabel('recall', fontsize=14)
        cells[i, j].set_ylabel('precision', fontsize=14)
        cells[i, j].grid(True)
        cells[i, j].set_xticks(np.linspace(0,1,11))
        cells[i, j].set_yticks(np.linspace(0,1,11))
        cells[i, j].set_title("{}, AP: {:.3f}".format(class_names[n*i+j+1], average_precisions[n*i+j+1]), fontsize=16)

### Predict

In [None]:
# 1: Set the generator for the predictions.

predict_generator = val_dataset.generate(batch_size=1,
                                         shuffle=True,
                                         transformations=[convert_to_3_channels,
                                                          resize],
                                         label_encoder=None,
                                         returns={'processed_images',
                                                  'inverse_transform',
                                                  'original_images',
                                                  'original_labels'},
                                         keep_images_without_gt=False)

In [None]:

# Generate batch_items
batch_images, batch_inverse_transforms, batch_original_images, batch_original_labels = next(predict_generator)

i = 0 # Which batch item to look at

#print("Image:", batch_filenames[i])
print()
print("Ground truth boxes:\n")
print(np.array(batch_original_labels[i]))


# 3: Make predictions.
y_pred = model.predict(batch_images)

In [None]:
# 4: Decode labels
reload(detector_help)
y_pred_decoded = decode_detections(y_pred,
                  priors,variances,
                  img_height = input_H,
                  img_width = input_W,
                  confidence_thresh = 0.5,
                  iou_threshold = 0.45)
# y_pred_decoded = detector_help.post_process(y_pred, 
#                                             priors,
#                                             num_classes,
#                                             input_H, 
#                                             input_W,
#                                             score_thresh = 0.5,
#                                             iou_thresh = 0.4)
y_pred_decoded_inv = apply_inverse_transforms(y_pred_decoded, batch_inverse_transforms)
np.set_printoptions(precision=2, suppress=True, linewidth=90)
print("Predicted boxes:\n")
print('   class   conf xmin   ymin   xmax   ymax')
print(y_pred_decoded_inv[i])

In [None]:
# 5: Draw 
colors = plt.cm.hsv(np.linspace(0, 1, num_classes+1)).tolist()
draw_detection(batch_original_images[i],y_pred_decoded_inv[i],class_names,
               show = True, draw_score = True,use_cm = True,color = colors, size = 'medium')
draw_detection(batch_original_images[i],batch_original_labels[i],class_names, size = 'medium')


### Image Demo

In [None]:
img_file = "data/dog3.jpg"
img = cv2.imread(img_file)
img = cv2.resize(img,(224,224))
x = cv2.resize(img, dsize=(224, 224), interpolation=cv2.INTER_CUBIC)
x = np.array(x,dtype='float')
x = x / 255
y_pred = model.predict(np.expand_dims(x,0))
y_pred = post_process(y_pred,priors,num_classes,input_H,input_W)
print(y_pred)
draw_detection(img,y_pred[0],class_names)

### Video detection demo

In [None]:
video_file = "data/cat_video.mp4"
result_file = "res.avi"
cap= cv2.VideoCapture(video_file)
fps = cap.get(cv2.CAP_PROP_FPS)
size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
frames = (int(cap.get(cv2.CAP_PROP_FRAME_COUNT)))
print('Input Video Size:',size,' Fps:',fps,' Frames:',frames)
writer = cv2.VideoWriter(result_file,cv2.VideoWriter_fourcc(*'MJPG'), fps, size)
success,frame = cap.read()
count = 1
while success:
    x = cv2.resize(frame,dsize=(input_H,input_W),interpolation=cv2.INTER_CUBIC)
    x = x / 255
    y_pred = model.predict(np.expand_dims(x,0))[0]
    y_pred [-4:] *= [*size,*size]
    draw_detection(frame, y_pred,class_names,font_scale = 1)
    writer.write(frame)
    cv2.waitKey(1)
    count += 1
    if count%50==0:
        print(y_pred)
        plt.imshow(frame)
        plt.show()
        print('.',end='')
    success,frame = cap.read()
print('done')