In [3]:
from keras.optimizers import Adam, SGD
from keras.preprocessing import image
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, TerminateOnNaN, CSVLogger
from keras import backend as K
from keras.models import load_model
from math import ceil
import numpy as np
from matplotlib import pyplot as plt

from architecture.mntiny import build_model
from loss_function.keras_ssd_loss import SSDLoss
from layers.keras_layer_AnchorBoxes import AnchorBoxes
from layers.keras_layer_DecodeDetections import DecodeDetections
from layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast

from encoder_decoder.ssd_input_encoder import SSDInputEncoder
from encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast

from data_generator.object_detection_2d_data_generator import DataGenerator
from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms
from data_generator.data_augmentation_chain_variable_input_size import DataAugmentationVariableInputSize
from data_generator.data_augmentation_chain_constant_input_size import DataAugmentationConstantInputSize
from data_generator.data_augmentation_chain_original_ssd import SSDDataAugmentation

import cv2

COLORS = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
FONT = cv2.FONT_HERSHEY_SIMPLEX

classes = ['background', 'car', 'truck', 'pedestrian', 'bicyclist', 'light']

%matplotlib inline

In [4]:
img_height = 300 # Height of the input images
img_width = 480 # Width of the input images
img_channels = 3 # Number of color channels of the input images
intensity_mean = 127.5 # Set this to your preference (maybe `None`). The current settings transform the input pixel values to the interval `[-1,1]`.
intensity_range = 127.5 # Set this to your preference (maybe `None`). The current settings transform the input pixel values to the interval `[-1,1]`.
n_classes = 5 # Number of positive classes
scales = [0.08, 0.16, 0.32, 0.64, 0.96] # An explicit list of anchor box scaling factors. If this is passed, it will override `min_scale` and `max_scale`.
aspect_ratios = [0.5, 1.0, 2.0] # The list of aspect ratios for the anchor boxes
two_boxes_for_ar1 = True # Whether or not you want to generate two anchor boxes for aspect ratio 1
steps = None # In case you'd like to set the step sizes for the anchor box grids manually; not recommended
offsets = None # In case you'd like to set the offsets for the anchor box grids manually; not recommended
clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries
variances = [1.0, 1.0, 1.0, 1.0] # The list of variances by which the encoded target coordinates are scaled
normalize_coords = True # Whether or not the model is supposed to use coordinates relative to the image size

In [3]:
# TODO: Set the path to the `.h5` file of the model to be loaded.
model_path = 'mobv2_epoch-64_loss-1.8802_val_loss-2.1250.h5'

# We need to create an SSDLoss object in order to pass that to the model loader.
ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

K.clear_session() # Clear previous models from memory.

model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes,
                                               'compute_loss': ssd_loss.compute_loss})

In [5]:
# 1: Build the Keras model

K.clear_session() # Clear previous models from memory.

model = build_model(image_size=(img_height, img_width, img_channels),
                    n_classes=n_classes,
                    mode='inference_fast',
                    l2_regularization=0.0005,
                    scales=scales,
                    aspect_ratios_global=aspect_ratios,
                    aspect_ratios_per_layer=None,
                    two_boxes_for_ar1=two_boxes_for_ar1,
                    steps=steps,
                    offsets=offsets,
                    clip_boxes=clip_boxes,
                    variances=variances,
                    normalize_coords=normalize_coords,
                    subtract_mean=intensity_mean,
                    divide_by_stddev=intensity_range)

# 2: Optional: Load some weights

model.load_weights('trained_weights/new_mobv2_epoch-45_val_loss-2.0145.h5', by_name=True)

model.summary()

# 3: Instantiate an Adam optimizer and the SSD loss function and compile the model

adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

Instructions for updating:
keep_dims is deprecated, use keepdims instead
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 300, 480, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 150, 240, 32) 896         input_1[0][0]                    
__________________________________________________________________________________________________
re_lu_1 (ReLU)                  (None, 150, 240, 32) 0           conv2d_1[0][0]                   
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 150, 240, 32) 1056        re_lu_1[0][0]                    
____________________________________

conv2d_14 (Conv2D)              (None, 38, 60, 192)  6336        add_3[0][0]                      
__________________________________________________________________________________________________
re_lu_14 (ReLU)                 (None, 38, 60, 192)  0           conv2d_14[0][0]                  
__________________________________________________________________________________________________
depthwise_conv2d_7 (DepthwiseCo (None, 19, 30, 192)  1920        re_lu_14[0][0]                   
__________________________________________________________________________________________________
re_lu_15 (ReLU)                 (None, 19, 30, 192)  0           depthwise_conv2d_7[0][0]         
__________________________________________________________________________________________________
conv2d_15 (Conv2D)              (None, 19, 30, 64)   12352       re_lu_15[0][0]                   
__________________________________________________________________________________________________
conv2d_16 

add_8 (Add)                     (None, 19, 30, 96)   0           conv2d_27[0][0]                  
                                                                 add_7[0][0]                      
__________________________________________________________________________________________________
conv2d_28 (Conv2D)              (None, 19, 30, 576)  55872       add_8[0][0]                      
__________________________________________________________________________________________________
re_lu_28 (ReLU)                 (None, 19, 30, 576)  0           conv2d_28[0][0]                  
__________________________________________________________________________________________________
depthwise_conv2d_14 (DepthwiseC (None, 10, 15, 576)  5760        re_lu_28[0][0]                   
__________________________________________________________________________________________________
re_lu_29 (ReLU)                 (None, 10, 15, 576)  0           depthwise_conv2d_14[0][0]        
__________

                                                                 classes5_reshape[0][0]           
                                                                 classes6_reshape[0][0]           
                                                                 classes7_reshape[0][0]           
__________________________________________________________________________________________________
boxes4_reshape (Reshape)        (None, 2280, 4)      0           boxes4[0][0]                     
__________________________________________________________________________________________________
boxes5_reshape (Reshape)        (None, 2280, 4)      0           boxes5[0][0]                     
__________________________________________________________________________________________________
boxes6_reshape (Reshape)        (None, 600, 4)       0           boxes6[0][0]                     
__________________________________________________________________________________________________
boxes7_res

In [6]:
def pre_pro_img(frame):
    orig_images = [] # Store the images here.
    input_images = [] # Store resized versions of the images here.
    orig_images.append(frame)
    #img = image.load_img(frame, target_size=(img_height, img_width))
    img = cv2.resize(orig_images[0], (480, 300))
    img = image.img_to_array(img)
    input_images.append(img)
    input_images = np.array(input_images)
    
    return orig_images, input_images

In [7]:
def predict(orig_images, input_images):
    y_pred = model.predict(input_images)
    
    confidence_threshold = 0.6
    y_pred_thresh = [y_pred[k][y_pred[k,:,1] > confidence_threshold] for k in range(y_pred.shape[0])]
    np.set_printoptions(precision=2, suppress=True, linewidth=90)
    
    for box in y_pred_thresh[0]:
        # Transform the predicted bounding boxes for the 300x300 image to the original image dimensions.
        xmin = int(box[-4] * orig_images[0].shape[1] / img_width)
        ymin = int(box[-3] * orig_images[0].shape[0] / img_height)
        xmax = int(box[-2] * orig_images[0].shape[1] / img_width)
        ymax = int(box[-1] * orig_images[0].shape[0] / img_height)
        color = COLORS[int(box[0]) % 3]
        label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
        cv2.rectangle(orig_images[0], (xmin, ymin), (xmax, ymax), color, 2)
        
        text_top = (xmin, ymin-10)
        text_bot = (xmin + 80, ymin + 5)
        text_pos = (xmin + 5, ymin)
        cv2.rectangle(orig_images[0], text_top, text_bot, color, -1)
        cv2.putText(orig_images[0], label, text_pos, FONT, 0.35, (0,0,0), 1)
    return orig_images[0]

In [8]:
import time
cap = cv2.VideoCapture('examples/sample3.mp4')

while(True):
    ret, frame = cap.read()
    start_time = time.time()
    orig_images, input_images = pre_pro_img(frame)
    pred_frame = predict(orig_images, input_images)
    print(time.time() - start_time)
    cv2.imshow('frame', pred_frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

2.039043426513672
0.02492809295654297
0.02590465545654297
0.02293562889099121
0.02592921257019043
0.023936986923217773
0.02592921257019043
0.024931907653808594
0.02493000030517578
0.025930166244506836
0.025930166244506836
0.025929927825927734
0.023934602737426758
0.024932146072387695
0.02393484115600586
0.024935483932495117
0.022936344146728516
0.02592921257019043
0.023946285247802734
0.02592945098876953
0.025928974151611328
0.023935556411743164
0.025928974151611328
0.023935556411743164
0.026927471160888672
0.026927471160888672
0.028923511505126953
0.029916763305664062
0.02493453025817871
0.03091597557067871
0.025930166244506836
0.02493143081665039
0.027925729751586914
0.027923583984375
0.02891826629638672
0.032050371170043945
0.02792501449584961
0.025928735733032227
0.029920339584350586
0.033907413482666016
0.029918670654296875
0.02991962432861328
0.030915498733520508
0.034905195236206055
0.03390789031982422
0.03690171241760254
0.026926755905151367
0.028923749923706055
0.0309152603149

0.024932384490966797
0.02493119239807129
0.023933887481689453
0.024934053421020508
0.02493119239807129
0.02493000030517578
0.025930404663085938
0.02393174171447754
0.024932146072387695
0.026925325393676758
0.02592635154724121
0.025926828384399414
0.025926828384399414
0.025926828384399414
0.024932146072387695
0.025927305221557617
0.024930715560913086
0.023932695388793945
0.024932146072387695
0.025927066802978516
0.02493000030517578
0.024931669235229492
0.02593064308166504
0.023935317993164062
0.025928497314453125
0.025931835174560547
0.024934768676757812
0.025928258895874023
0.02892017364501953
0.026927471160888672
0.025928735733032227
0.03191113471984863
0.024934768676757812
0.023932933807373047
0.02593398094177246
0.024932146072387695
0.026928186416625977
0.02593064308166504
0.024930715560913086
0.024934053421020508
0.025931119918823242
0.025928258895874023
0.02493143081665039
0.02593374252319336
0.02493143081665039
0.02493143081665039
0.024933576583862305
0.02493429183959961
0.024931