In [0]:
from keras import backend as K
from keras.models import load_model
from keras.preprocessing import image
from keras.optimizers import Adam
from imageio import imread
import numpy as np
from matplotlib import pyplot as plt

# from google.colab import drive
# drive.mount("/content/drive/", force_remount=True)
# %cd "drive/My Drive/Colab Notebooks"

# !git clone https://github.com/pierluigiferrari/ssd_keras.git

%cd /content/ssd_keras
!ls

from models.keras_ssd512 import ssd_512
from keras_loss_function.keras_ssd_loss import SSDLoss
from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes
from keras_layers.keras_layer_DecodeDetections import DecodeDetections
from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast
from keras_layers.keras_layer_L2Normalization import L2Normalization

from ssd_encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast

from data_generator.object_detection_2d_data_generator import DataGenerator
from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels
from data_generator.object_detection_2d_geometric_ops import Resize
from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms

from eval_utils.average_precision_evaluator import Evaluator


%matplotlib inline

/content/ssd_keras
bounding_box_utils   ssd300_evaluation_COCO.ipynb
CONTRIBUTING.md      ssd300_evaluation.ipynb
data_generator	     ssd300_inference.ipynb
eval_utils	     ssd300_training.ipynb
examples	     ssd512_inference.ipynb
__init__.py	     ssd7_training.ipynb
ISSUE_TEMPLATE.md    ssd_encoder_decoder
keras_layers	     training_summaries
keras_loss_function  VGG_VOC0712Plus_SSD_512x512_ft_iter_160000.h5
LICENSE.txt	     VOCdevkit
misc_utils	     VOCtest_06-Nov-2007.tar
models		     weight_sampling_tutorial.ipynb
README.md


In [0]:

# Set the image size.
img_height = 512
img_width = 512
!pwd

/content/ssd_keras


In [0]:
# 1: Build the Keras model

K.clear_session() # Clear previous models from memory.

model = ssd_512(image_size=(img_height, img_width, 3),
                n_classes=20,
                mode='inference',
                l2_regularization=0.0005,
#                 scales=[0.07, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1.05], # The scales for MS COCO are
                scales = [0.04, 0.1, 0.26, 0.42, 0.58, 0.70, 0.8, 1.0],
                aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                         [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                         [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                         [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                         [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                         [1.0, 2.0, 0.5],
                                         [1.0, 2.0, 0.5]],
               two_boxes_for_ar1=True,
               steps=[8, 16, 32, 64, 128, 256, 512],
               offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
               clip_boxes=False,
               variances=[0.1, 0.1, 0.2, 0.2],
               normalize_coords=True,
               subtract_mean=[123, 117, 104],
               swap_channels=[2, 1, 0],
               confidence_thresh=0.5,
               iou_threshold=0.45,
               top_k=200,
               nms_max_output_size=400)

# 2: Load the trained weights into the model.

# TODO: Set the path of the trained weights.

weights_path = '/../content/VGG_VOC0712_SSD_512x512_ft_iter_120000.h5'

model.load_weights(weights_path, by_name=True)

# 3: Compile the model so that Keras won't complain the next time you load it.

adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

In [0]:
!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar


In [0]:
!tar -xvf VOCtest_06-Nov-2007.tar

In [0]:
dataset = DataGenerator()

# TODO: Set the paths to the dataset here.
Pascal_VOC_dataset_images_dir = 'VOCdevkit/VOC2007/JPEGImages/'
Pascal_VOC_dataset_annotations_dir = 'VOCdevkit/VOC2007/Annotations/'
Pascal_VOC_dataset_image_set_filename = 'VOCdevkit/VOC2007/ImageSets/Main/test.txt'

# The XML parser needs to now what object class names to look for and in which order to map them to integers.
classes = ['background',
           'aeroplane', 'bicycle', 'bird', 'boat',
           'bottle', 'bus', 'car', 'cat',
           'chair', 'cow', 'diningtable', 'dog',
           'horse', 'motorbike', 'person', 'pottedplant',
           'sheep', 'sofa', 'train', 'tvmonitor']

dataset.parse_xml(images_dirs=[Pascal_VOC_dataset_images_dir],
                  image_set_filenames=[Pascal_VOC_dataset_image_set_filename],
                  annotations_dirs=[Pascal_VOC_dataset_annotations_dir],
                  classes=classes,
                  include_classes='all',
                  exclude_truncated=False,
                  exclude_difficult=False,
                  ret=False)


Processing image set 'test.txt':   0%|          | 0/4952 [00:00<?, ?it/s][A
Processing image set 'test.txt':   1%|          | 35/4952 [00:00<00:14, 347.89it/s][A
Processing image set 'test.txt':   1%|▏         | 66/4952 [00:00<00:14, 326.01it/s][A
Processing image set 'test.txt':   2%|▏         | 96/4952 [00:00<00:15, 314.57it/s][A
Processing image set 'test.txt':   3%|▎         | 130/4952 [00:00<00:15, 320.18it/s][A
Processing image set 'test.txt':   3%|▎         | 161/4952 [00:00<00:15, 316.29it/s][A
Processing image set 'test.txt':   4%|▍         | 199/4952 [00:00<00:14, 332.20it/s][A
Processing image set 'test.txt':   5%|▍         | 240/4952 [00:00<00:13, 345.41it/s][A
Processing image set 'test.txt':   6%|▌         | 273/4952 [00:00<00:13, 334.31it/s][A
Processing image set 'test.txt':   6%|▌         | 307/4952 [00:00<00:13, 335.10it/s][A
Processing image set 'test.txt':   7%|▋         | 350/4952 [00:01<00:12, 357.63it/s][A
Processing image set 'test.txt':   8%|▊      

In [0]:
n_classes = 20
model_mode = 'inference'

evaluator = Evaluator(model=model,
                      n_classes=n_classes,
                      data_generator=dataset,
                      model_mode=model_mode)

results = evaluator(img_height=img_height,
                    img_width=img_width,
                    batch_size=8,
                    data_generator_mode='resize',
                    round_confidences=False,
                    matching_iou_threshold=0.5,
                    border_pixels='include',
                    sorting_algorithm='quicksort',
                    average_precision_mode='sample',
                    num_recall_points=11,
                    ignore_neutral_boxes=True,
                    return_precisions=True,
                    return_recalls=True,
                    return_average_precisions=True,
                    verbose=True)

mean_average_precision, average_precisions, precisions, recalls = results

Number of images in the evaluation dataset: 4952


  0%|          | 0/619 [00:00<?, ?it/s][A
Producing predictions batch-wise:   0%|          | 0/619 [00:00<?, ?it/s][A
Producing predictions batch-wise:   0%|          | 1/619 [00:01<14:43,  1.43s/it][A
Producing predictions batch-wise:   0%|          | 2/619 [00:02<12:54,  1.25s/it][A
Producing predictions batch-wise:   0%|          | 3/619 [00:03<11:37,  1.13s/it][A
Producing predictions batch-wise:   1%|          | 4/619 [00:03<10:45,  1.05s/it][A
Producing predictions batch-wise:   1%|          | 5/619 [00:04<10:06,  1.01it/s][A
Producing predictions batch-wise:   1%|          | 6/619 [00:05<09:40,  1.06it/s][A
Producing predictions batch-wise:   1%|          | 7/619 [00:06<09:21,  1.09it/s][A
Producing predictions batch-wise:   1%|▏         | 8/619 [00:07<09:07,  1.12it/s][A
Producing predictions batch-wise:   1%|▏         | 9/619 [00:08<08:58,  1.13it/s][A
Producing predictions batch-wise:   2%|▏         | 10/619 [00:09

In [0]:
for i in range(1, len(average_precisions)):
    print("{:<14}{:<6}{}".format(classes[i], 'AP', round(average_precisions[i], 3)))
print()
print("{:<14}{:<6}{}".format('','mAP', round(mean_average_precision, 3)))

aeroplane     AP    0.717
bicycle       AP    0.811
bird          AP    0.716
boat          AP    0.613
bottle        AP    0.422
bus           AP    0.806
car           AP    0.809
cat           AP    0.812
chair         AP    0.582
cow           AP    0.804
diningtable   AP    0.683
dog           AP    0.797
horse         AP    0.813
motorbike     AP    0.81
person        AP    0.71
pottedplant   AP    0.474
sheep         AP    0.802
sofa          AP    0.776
train         AP    0.8
tvmonitor     AP    0.789

              mAP   0.727


In [0]:
evaluator.get_num_gt_per_class(ignore_neutral_boxes=True,
                               verbose=False,
                               ret=False)

evaluator.match_predictions(ignore_neutral_boxes=True,
                            matching_iou_threshold=0.5,
                            border_pixels='include',
                            sorting_algorithm='quicksort',
                            verbose=True,
                            ret=False)

precisions, recalls = evaluator.compute_precision_recall(verbose=True, ret=True)

average_precisions = evaluator.compute_average_precisions(mode='integrate',
                                                          num_recall_points=11,
                                                          verbose=True,
                                                          ret=True)

mean_average_precision = evaluator.compute_mean_average_precision(ret=True)


  0%|          | 0/243 [00:00<?, ?it/s][A
Matching predictions to ground truth, class 1/20.:   0%|          | 0/243 [00:00<?, ?it/s][A
Matching predictions to ground truth, class 1/20.: 100%|██████████| 243/243 [00:00<00:00, 9738.91it/s][A
  0%|          | 0/319 [00:00<?, ?it/s][A
Matching predictions to ground truth, class 2/20.:   0%|          | 0/319 [00:00<?, ?it/s][A
Matching predictions to ground truth, class 2/20.: 100%|██████████| 319/319 [00:00<00:00, 9920.17it/s][A
  0%|          | 0/417 [00:00<?, ?it/s][A
Matching predictions to ground truth, class 3/20.:   0%|          | 0/417 [00:00<?, ?it/s][A
Matching predictions to ground truth, class 3/20.: 100%|██████████| 417/417 [00:00<00:00, 10481.55it/s][A
  0%|          | 0/229 [00:00<?, ?it/s][A
Matching predictions to ground truth, class 4/20.:   0%|          | 0/229 [00:00<?, ?it/s][A
Matching predictions to ground truth, class 4/20.: 100%|██████████| 229/229 [00:00<00:00, 9711.59it/s][A
  0%|          | 0/268 [00

In [0]:
for i in range(1, len(average_precisions)):
    print("{:<14}{:<6}{}".format(classes[i], 'AP', round(average_precisions[i], 3)))
print()
print("{:<14}{:<6}{}".format('','mAP', round(mean_average_precision, 3)))

aeroplane     AP    0.766
bicycle       AP    0.852
bird          AP    0.773
boat          AP    0.655
bottle        AP    0.425
bus           AP    0.866
car           AP    0.874
cat           AP    0.882
chair         AP    0.585
cow           AP    0.851
diningtable   AP    0.717
dog           AP    0.832
horse         AP    0.878
motorbike     AP    0.835
person        AP    0.735
pottedplant   AP    0.462
sheep         AP    0.8
sofa          AP    0.823
train         AP    0.856
tvmonitor     AP    0.776

              mAP   0.762
