In [4]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"

from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras import backend as K
from keras.models import load_model
from math import ceil
import numpy as np
from matplotlib import pyplot as plt

%matplotlib inline


#from keras_ssd7 import build_model 
from keras_resnet import build_model
from keras_ssd_loss import SSDLoss
from ssd_box_encode_decode_utils import SSDBoxEncoder, decode_y, decode_y2
from ssd_batch_generator import BatchGenerator
import cv2
import imageio
import timeit
import time
imageio.plugins.ffmpeg.download()


In [None]:


### Set up the model

# 1: Set some necessary parameters

img_height = 300 # Height of the input images
img_width = 480 # Width of the input images
img_channels = 3 # Number of color channels of the input images
n_classes = 6 # Number of classes including the background class
min_scale = 0.08 # The scaling factor for the smallest anchor boxes
max_scale = 0.96 # The scaling factor for the largest anchor boxes
scales = [0.08, 0.16, 0.32, 0.64, 0.96] # An explicit list of anchor box scaling factors. If this is passed, it will override `min_scale` and `max_scale`.
aspect_ratios = [1.0/3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0] # The list of aspect ratios for the anchor boxes
two_boxes_for_ar1 = True # Whether or not you want to generate two anchor boxes for aspect ratio 1
limit_boxes = False # Whether or not you want to limit the anchor boxes to lie entirely within the image boundaries
variances = [1.0, 1.0, 1.0, 1.0] # The list of variances by which the encoded target coordinates are scaled
coords = 'centroids' # Whether the box coordinates to be used should be in the 'centroids' or 'minmax' format, see documentation
normalize_coords = False # Whether or not the model is supposed to use relative coordinates that are within [0,1]

# 2: Build the Keras model (and possibly load some trained weights)

K.clear_session() # Clear previous models from memory.
# The output `predictor_sizes` is needed below to set up `SSDBoxEncoder`
model, predictor_sizes = build_model(image_size=(img_height, img_width, img_channels),
                                      n_classes=n_classes,
                                      min_scale=min_scale,
                                      max_scale=max_scale,
                                      scales=scales,
                                      aspect_ratios_global=aspect_ratios,
                                      aspect_ratios_per_layer=None,
                                      two_boxes_for_ar1=two_boxes_for_ar1,
                                      limit_boxes=limit_boxes,
                                      variances=variances,
                                      coords=coords,
                                      normalize_coords=normalize_coords)

# trained 4 - resnet
#model = load_model('./trained/ssd7_0.h5')
model.load_weights('./trained6/ssd7_0_weights.h5')
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 300, 480, 3)   0                                            
____________________________________________________________________________________________________
lambda1 (Lambda)                 (None, 300, 480, 3)   0           input_1[0][0]                    
____________________________________________________________________________________________________
conv1 (Conv2D)                   (None, 150, 240, 64)  9472        lambda1[0][0]                    
____________________________________________________________________________________________________
bn_conv1 (BatchNormalization)    (None, 150, 240, 64)  256         conv1[0][0]                      
___________________________________________________________________________________________

In [None]:
video_reader = imageio.get_reader("GP049706.MP4", "ffmpeg")
new_file = imageio.get_writer("version1.mp4", fps=25)
length = video_reader.get_length()
print ("video length", length)

for i in range(0,length,1):
    frame = video_reader.get_data(i)
    X = frame
    print ("frame number - ", i)
    
    #path = './test_data/'
    #X = cv2.imread(path+'Zrzut ekranu z 2017-08-16 05-12-13.png')
    fx_ = 480/frame.shape[1]
    fy_ = 300/frame.shape[0]
    
    #fx_inv = frame.shape[1]/480
    #fy_inv = frame.shape[0]/300
    
    #print ("resize coeficients", fx_, fy_)
    
    X = cv2.resize(X, None, fx=fx_, fy=fy_)
    #X = X[...,::-1]
    #plt.imshow(X)

    print (X.shape)
    X = np.expand_dims(X,axis=0)
    print (X.shape)
    #print (X)

    # 4: Decode the raw prediction `y_pred`
    start = timeit.default_timer()
    start2 = time.process_time()
    
    y_pred = model.predict(X)
    stop = timeit.default_timer()
    stop2 = time.process_time()
    time1 = stop2 - start2
    #print (stop - start) 
    #print (stop2 - start2) 
    
    start = timeit.default_timer()
    y_pred_decoded = decode_y2(y_pred,
                              confidence_thresh=0.90,
                              iou_threshold=0.01,
                              top_k='all',
                              normalize_coords=False,
                              img_height=None,
                              img_width=None)
    stop = timeit.default_timer()
    time2 = stop - start
    #print (stop - start) 
    
    if y_pred_decoded[0].shape[0] > 0:
        print("Decoded predictions (output format is [class_id, confidence, xmin, xmax, ymin, ymax]):\n")
        print(y_pred_decoded[0])
    else:
        print ("No detections")
    # 5: Draw the predicted boxes onto the image

    #plt.figure(figsize=(20,12))
    #plt.imshow(X[0])
    
    #current_axis = plt.gca()
    
    frame_desc = "frame number " + str(i) + " processing time " + str("%.5f" % (time1+time2))
    classes = ['background', 'car', 'truck', 'pedestrian', 'bicyclist', 'light'] # Just so we can print class names onto the image instead of IDs
    frame = cv2.putText(frame, frame_desc, (30,30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255))
    
    # Draw the predicted boxes in blue
    for box in y_pred_decoded[0]:
        if min(box) > 0:   # hotfix for predictions outside of image
            object_class = classes[int(box[0])]
            pred_confidence = str("%.4f" % box[1])

            text = object_class + " " + pred_confidence

            x_min = int(box[2]/fx_)
            x_max = int(box[3]/fx_)

            y_min = int((box[4])/fy_)
            y_max = int((box[5])/fy_)

            print ("detection rescaled back to video", x_min, x_max, y_min, y_max)
            #label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
            frame = cv2.rectangle(frame,(x_min, y_min), (x_max, y_max), (255,0,0), 2)
            frame = cv2.rectangle(frame,(x_min, y_min-20), (x_max, y_min), (255,0,0), -1)

            frame = cv2.putText(frame, text, (x_min+2,y_min-5),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255))
    
    print ("------------------------------ \n \n")
    new_file.append_data(frame)
    
    # show file with bounding boxes
    #plt.figure(figsize=(20,12))
    #plt.imshow(frame)
    #plt.show()
    
    
    """
    # Draw the predicted boxes in blue
    for box in y_pred_decoded[0]:
        label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
        current_axis.add_patch(plt.Rectangle((box[2], box[4]), box[3]-box[2], box[5]-box[4], color='blue', fill=False, linewidth=2))  
        current_axis.text(box[2], box[4], label, size='x-large', color='white', bbox={'facecolor':'blue', 'alpha':1.0})
    plt.show()
    """
new_file.close()