In [1]:
from keras import backend as K
from keras.models import load_model
from keras.preprocessing import image
from keras.optimizers import Adam, SGD
from keras.utils import plot_model
from imageio import imread
import numpy as np
from matplotlib import pyplot as plt

import time

from Architecture.SSD300 import ssd_300
from Architecture.SSD7 import build_model
from Loss_function.keras_ssd_loss import SSDLoss
from Layers.keras_layer_AnchorBoxes import AnchorBoxes
from Layers.keras_layer_DecodeDetections import DecodeDetections
from Layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast
from Layers.keras_layer_L2Normalization import L2Normalization

from encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast

from data_generator.object_detection_2d_data_generator import DataGenerator
from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels
from data_generator.object_detection_2d_geometric_ops import Resize
from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms

import cv2
COLORS = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
FONT = cv2.FONT_HERSHEY_SIMPLEX

classes = ['background',
           'aeroplane', 'bicycle', 'bird', 'boat',
           'bottle', 'bus', 'car', 'cat',
           'chair', 'cow', 'diningtable', 'dog',
           'horse', 'motorbike', 'person', 'pottedplant',
           'sheep', 'sofa', 'train', 'tvmonitor']

%matplotlib inline

Using TensorFlow backend.


In [2]:
# Set the image size.
img_height = 300
img_width = 300

In [3]:
# 1: Build the Keras model

K.clear_session() # Clear previous models from memory.

model = ssd_300(image_size=(img_height, img_width, 3),
                n_classes=20,
                mode='inference',
                l2_regularization=0.0005,
                scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05], # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
                aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                         [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                         [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                         [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                         [1.0, 2.0, 0.5],
                                         [1.0, 2.0, 0.5]],
                two_boxes_for_ar1=True,
                steps=[8, 16, 32, 64, 100, 300],
                offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
                clip_boxes=False,
                variances=[0.1, 0.1, 0.2, 0.2],
                normalize_coords=True,
                subtract_mean=[123, 117, 104],
                swap_channels=[2, 1, 0],
                confidence_thresh=0.5,
                iou_threshold=0.45,
                top_k=200,
                nms_max_output_size=400)


# 2: Load the trained weights into the model.

# TODO: Set the path of the trained weights.
#weights_path = 'weights/VGG_VOC0712_SSD_300x300_iter_120000.h5'
weights_path = 'ssd300_pascal_07+12_epoch-117_loss-4.1598_val_loss-4.1888.h5'
#weights_path = 'ssd7_epoch-20_loss-2.3970_val_loss-2.4971.h5'

model.load_weights(weights_path, by_name=True)

plot_model(model, to_file='vis_model.png')
model.summary()

# 3: Compile the model so that Keras won't complain the next time you load it.

#adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False)

ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

model.compile(optimizer=sgd, loss=ssd_loss.compute_loss)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 300, 300, 3)  0                                            
__________________________________________________________________________________________________
identity_layer (Lambda)         (None, 300, 300, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
input_mean_normalization (Lambd (None, 300, 300, 3)  0           identity_layer[0][0]             
__________________________________________________________________________________________________
input_channel_swap (Lambda)     (None, 300, 300, 3)  0           input_mean_normalization[0][0]   
__________________________________________________________________________________________________
conv1_1 (C

                                                                 conv7_2_mbox_priorbox_reshape[0][
                                                                 conv8_2_mbox_priorbox_reshape[0][
                                                                 conv9_2_mbox_priorbox_reshape[0][
__________________________________________________________________________________________________
predictions (Concatenate)       (None, 8732, 33)     0           mbox_conf_softmax[0][0]          
                                                                 mbox_loc[0][0]                   
                                                                 mbox_priorbox[0][0]              
__________________________________________________________________________________________________
decoded_predictions (DecodeDete (None, <tf.Tensor 't 0           predictions[0][0]                
Total params: 26,285,486
Trainable params: 26,285,486
Non-trainable params: 0
_______________________________

In [4]:
def pre_pro_img(frame):
    orig_images = [] # Store the images here.
    input_images = [] # Store resized versions of the images here.
    orig_images.append(frame)
    #img = image.load_img(frame, target_size=(img_height, img_width))
    img = cv2.resize(orig_images[0], (300, 300))
    img = image.img_to_array(img)
    input_images.append(img)
    input_images = np.array(input_images)
    
    return orig_images, input_images
    

In [5]:
def predict(orig_images, input_images):
    y_pred = model.predict(input_images)
    
    confidence_threshold = 0.5
    y_pred_thresh = [y_pred[k][y_pred[k,:,1] > confidence_threshold] for k in range(y_pred.shape[0])]
    np.set_printoptions(precision=2, suppress=True, linewidth=90)
    
    for box in y_pred_thresh[0]:
        # Transform the predicted bounding boxes for the 300x300 image to the original image dimensions.
        xmin = box[2] * orig_images[0].shape[1] / img_width
        ymin = box[3] * orig_images[0].shape[0] / img_height
        xmax = box[4] * orig_images[0].shape[1] / img_width
        ymax = box[5] * orig_images[0].shape[0] / img_height
        color = COLORS[int(box[0]) % 3]
        label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
        cv2.rectangle(orig_images[0], (int(xmin), int(ymin)), (int(xmax), int(ymax)), color, 2)
        cv2.putText(orig_images[0], label, (int(xmin), int(ymin)), FONT, 1, color, 1, cv2.LINE_AA)
    return orig_images[0]
        

In [6]:
cap = cv2.VideoCapture(0)
#cap = cv2.VideoCapture('http://192.168.0.186:8080/stream/video.mjpeg')

# Print Frames Per Second(FPS) every second
# Initiating the start time and counter
start_time = time.time()
x = 1
counter = 0

# Reads frames captured from opencv and predicts the objects
while(True):
    ret, frame = cap.read()
    
    orig_images, input_images = pre_pro_img(frame)
    pred_frame = predict(orig_images, input_images)
    
    # Print the FPS after predicting
    counter+=1
    if(time.time() - start_time) > x :
        print("FPS: ", counter / (time.time() - start_time))
        counter = 0
        start_time = time.time()
    
    # Visualize the predicted frames
    cv2.imshow('frame', pred_frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

FPS:  0.28307636487721016
FPS:  16.40508686947331
FPS:  14.995130689621147
FPS:  15.883961566363586
FPS:  16.82674602181952
FPS:  16.86006437279091
FPS:  16.860040452841037
FPS:  16.67857693382782
FPS:  16.565104706325126
FPS:  16.56508931276287
FPS:  16.960706904175787
FPS:  17.123525994949865
FPS:  16.876717153343844
FPS:  16.760557977201014
FPS:  16.59735920513122
FPS:  14.935593585685659
FPS:  14.716387969879788
FPS:  16.358451196511332
FPS:  16.662262073819655
FPS:  16.645979100769534
FPS:  16.646006303256684
FPS:  16.744117803842336
FPS:  16.793590738828637
FPS:  16.860255734833302
FPS:  16.69487295836395
FPS:  16.711294856876346
FPS:  16.744050959769567
FPS:  16.843336996340458
FPS:  16.6785613286492
FPS:  16.926985971879187
FPS:  16.99450575118098
FPS:  16.71147502331417
FPS:  16.744062755743627
FPS:  16.111030572090158
FPS:  13.939897274420115
FPS:  15.590679538390985
FPS:  15.728251708785939
FPS:  15.915465180091363
FPS:  16.500958887593196
FPS:  15.91547272908376
FPS:  15.93