In [5]:
from __future__ import division
import numpy as np
import tensorflow as tf

In [2]:
# Decode the raw SSD predictions output.


#INPUT shape:  3D Tensor (batch_size,n_boxes,n_classes +12)

#--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+

# We get the input from the model_fn.py file when calculate predictions

# < predictions=tf.concat(values=[mbox_conf_softmax, mbox_loc, mbox_priorbox],axis=0)>

# mbox_conf_sotfmax -predicted  probabilities of the classes presence in the bboxes | len(mbox_conf_sotfmax)=20 (20 classes)

# mbox_loc - predicted shifts of the bboxes | len (mbox_loc)=4 (cx,cy,w,h)

# mbox_priorbox - default coordinates and sizes of default priors + variance | len(mbox_priorbox)=8 (cx,cy,w,h,var1,var1,var2,var2)

#--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+


#OUTPUT shape : 3D Tensor (batch_size,top_k,6)

In [None]:
class DecodeDetections(Layer):
    
    def __init__(self,
                confidence_thresh=0.01,
                iou_threshold=0.45,
                top_k=200,
                nms_max_output_size=400,
                coords='centroids',
                normalize_coords=True,
                img_height=None,
                img_width=None,
                **kwargs):
        
        # We need these members for the config.
        self.confidence_thresh = confidence_thresh
        self.iou_threshold = iou_threshold
        
        self.top_k = top_k
        self.normalize_coords = normalize_coords
        self.img_height = img_height
        
        self.img_width = img_width
        self.coords = coords
        
        self.nms_max_output_size = nms_max_output_size

        # We need these members for TensorFlow.
        self.tf_confidence_thresh = tf.constant(self.confidence_thresh, name='confidence_thresh')
        self.tf_iou_threshold = tf.constant(self.iou_threshold, name='iou_threshold')
        
        self.tf_top_k = tf.constant(self.top_k, name='top_k')
        self.tf_normalize_coords = tf.constant(self.normalize_coords, name='normalize_coords')
        
        self.tf_img_height = tf.constant(self.img_height, dtype=tf.float32, name='img_height')
        self.tf_img_width = tf.constant(self.img_width, dtype=tf.float32, name='img_width')
        
        self.tf_nms_max_output_size = tf.constant(self.nms_max_output_size, name='nms_max_output_size')
        
        
    def call(self,y_pred,mask=None):
        
        #3D tensor of shape (batch_size, top_k, 6). The second axis is zero-padded
        #to always yield top_k predictions per batch item. The last axis contains
        #the coordinates for each predicted box in the format
        #[class_id, confidence, xmin, ymin, xmax, ymax]
        
        
        #--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
        # 1. Convert the box coordinates from predicted anchor box offsets to predicted
        # absolute coordinates
        #--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
        
        # Convert anchor box offsets to image offsets
        
        # y_pred = [class_conf1 ....class_conf2 ,cx_shift,cy_shift,w_shift,h_shift,cx,cy,w,h,var1,var1,var2,var2]
        
        cx = y_pred[...,-12]*y_pred[...,-4]*y_pred[...,-6]+y_pred[...,-8] # cx = cx_pred * cx_variance * w_anchor + cx_anchor
        cy = y_pred[...,-11]*y_pred[...,-3]*y_pred[...,-5]+y_pred[...,-7] # cy = cy_pred * cy_variance * h_anchor + cy_anchor
        w = tf.exp(y_pred[...,-10] * y_pred[...,-2]) * y_pred[...,-6] # w = exp(w_pred * variance_w) * w_anchor
        h = tf.exp(y_pred[...,-9] * y_pred[...,-1]) * y_pred[...,-5] # h = exp(h_pred * variance_h) * h_anchor
        
        # we have calculated the finish coordinates of bboxes in relative system coords
        
        #now we have to convert relative coords to corners coords (xmin,ymin,xmax,ymax)
        # Convert centroids to corners.
        
        xmin = cx - 0.5 * w
        ymin = cy - 0.5 * h
        xmax = cx + 0.5 * w
        ymax = cy + 0.5 * h
        
        #We can use 2 types of coords : relative and absolute.And in each type format we have to add 1 extra dim for batch_size
        
        def normalized_coords():
            #Add dim for batch_size
            xmin1 = tf.expand_dims(xmin * self.tf_img_width, axis=-1)
            ymin1 = tf.expand_dims(ymin * self.tf_img_height, axis=-1)
            xmax1 = tf.expand_dims(xmax * self.tf_img_width, axis=-1)
            ymax1 = tf.expand_dims(ymax * self.tf_img_height, axis=-1)
            return xmin1, ymin1, xmax1, ymax1
        
        def non_normalized_coords():
            #Add dim for batch_size
            return tf.expand_dims(xmin, axis=-1),tf.expand_dims(ymin, axis=-1) tf.expand_dims(xmax, axis=-1),tf.expand_dims(ymax, axis=-1)
        
        
        #if self.tf_normalize_coords=True , use normalized_coords , else non_normalized_coords
        xmin, ymin, xmax, ymax = tf.cond(self.tf_normalize_coords, normalized_coords(), non_normalized_coords())
        
        # Concatenate the one-hot class confidences and the converted box coordinates to form the decoded predictions tensor
        
        y_pred = tf.concat(values=[y_pred[...,:-12], xmin, ymin, xmax, ymax], axis=-1)
        
        #now we have 3D Tensor shape (batch_size,n_boxes,16) 
        #the last dim (16) consists of 20 classes one-hot confidence + 4 coords (xmin,ymin,xmax,ymax)
        
        
        
        #--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
        # 2. Perform confidence thresholding, per-class non-maximum suppression, and
        #top-k filtering.
        #--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
        
        batch_size=tf.shape(y_pred)[0] # Output dtype: tf.int32
        n_boxes=tf.shape(y_pred)[1]
        n_classes=tf.shape(y_pred)[2]-4
        class_indices=tf.range(1,n_classes)
        
        # Create a function that filters the predictions for the given batch item. Specifically, it performs:
        # - confidence thresholding
        # - non-maximum suppression (NMS)
        # - top-k filtering
        
        def filter_predictions(batch_item): #batch_item i suppose it is y_pred!!!!!
                
            # Create a function that filters the predictions for one single class.
            def filter_single_class(index)
                # From a tensor of shape (n_boxes, n_classes + 4 coordinates) extract
                # a tensor of shape (n_boxes, 1 + 4 coordinates) that contains the
                # confidnece values for just one class, determined by index
                
                confidences = tf.expand_dims(batch_item[:,:,index], axis=-1) #shape (batch_size,n_boxes,1)
                
                class_id=tf.fill(dims=tf.shape(confidences),value=tf.float(index)) #shape (batch_size,n_boxes,1)
                
                box_coordinates = batch_item[...,-4:] #shape (batch_size,n_boxes,4)
                
                single_class = tf.concat([class_id, confidences, box_coordinates], axis=-1) #shape (batch_size,n_boxes,6)
                
                # Apply confidence thresholding with respect to the class defined by index
                
                threshold_met=single_class[:,1] > self.tf_confidense_thresh
                
                single_class=tf.boolean_mask(tensor=single_class,
                                             mask=threshold_met)
                
                # If any boxes made the threshold, perform NMS
                
                def perform_nms():
                    scores=single_class[:,:,1]
                    
                    #tf.image.non_max_suppression() needs the box coordinates in the format (ymin, xmin, ymax, xmax)
                    
                    xmin = tf.expand_dims(single_class[...,-4], axis=-1)
                    ymin = tf.expand_dims(single_class[...,-3], axis=-1)
                    xmax = tf.expand_dims(single_class[...,-2], axis=-1)
                    ymax = tf.expand_dims(single_class[...,-1], axis=-1)
                    boxes = tf.concat(values=[ymin, xmin, ymax, xmax], axis=-1)
                    
                    #apply tf function to calculate non maximum supression
                    
                    maxima_indices = tf.image.non_max_suppression(boxes=boxes,
                                              scores=scores,
                                              max_output_size=self.tf_nms_max_output_size,
                                              iou_threshold=self.iou_threshold,
                                              name='non_maximum_suppresion')
                    
                    #collect boxes after nms
                    maxima = tf.gather(params=single_class,
                                       ndices=maxima_indices,
                                       axis=0)
                    
                    
                    return maxima
                
                def no_confident_predictions():
                    return tf.constant(value=0.0, shape=(1,6)) #WHY (1,6) not (1,6,1)
                    
                single_class_nms = tf.cond(tf.equal(tf.size(single_class), 0), no_confident_predictions, perform_nms)
                    
                # Make sure single_class is exactly self.nms_max_output_size elements long
                    
                padded_single_class = tf.pad(tensor=single_class_nms,
                                                 paddings=[[0, self.tf_nms_max_output_size - tf.shape(single_class_nms)[0]], [0, 0]],
                                                 mode='CONSTANT',
                                                 constant_values=0.0)

                return padded_single_class
                
            # Iterate filter_single_class() over all class indices.
            filtered_single_classes = tf.map_fn(fn=lambda i: filter_single_class(i),
                                                elems=tf.range(1,n_classes),
                                                dtype=tf.float32,
                                                parallel_iterations=128,
                                                back_prop=False,
                                                swap_memory=False,
                                                infer_shape=True,
                                                name='loop_over_classes')
                    
            # Concatenate the filtered results for all individual classes to one tensor.
            filtered_predictions = tf.reshape(tensor=filtered_single_classes, shape=(-1,6))
                    
            
            # Perform top-k filtering for this batch item or pad it in case there are
            # fewer than `self.top_k` boxes left at this point. Either way, produce a
            # tensor of length `self.top_k`. By the time we return the final results tensor
            # for the whole batch, all batch items must have the same number of predicted
            # boxes so that the tensor dimensions are homogenous. If fewer than `self.top_k`
            # predictions are left after the filtering process above, we pad the missing
            # predictions with zeros as dummy entries.
            def top_k():
                return tf.gather(params=filtered_predictions,
                                 indices=tf.nn.top_k(filtered_predictions[:, 1], k=self.tf_top_k, sorted=True).indices,
                                 axis=0)
            def pad_and_top_k():
                padded_predictions = tf.pad(tensor=filtered_predictions,
                                            paddings=[[0, self.tf_top_k - tf.shape(filtered_predictions)[0]], [0, 0]],
                                            mode='CONSTANT',
                                            constant_values=0.0)
                return tf.gather(params=padded_predictions,
                                 indices=tf.nn.top_k(padded_predictions[:, 1], k=self.tf_top_k, sorted=True).indices,
                                 axis=0)

            top_k_boxes = tf.cond(tf.greater_equal(tf.shape(filtered_predictions)[0], self.tf_top_k), top_k, pad_and_top_k)

            return top_k_boxes
        
        

# Test shapes of Tensors

In [76]:
tf_img_height=300
tf_img_width=300

In [77]:
data=np.random.rand(1,100,32)

In [78]:
data.shape

(1, 100, 32)

In [79]:
y_pred=tf.reshape(data,[-1,100,32])

In [80]:
cx = y_pred[...,-12]*y_pred[...,-4]*y_pred[...,-6]+y_pred[...,-8] # cx = cx_pred * cx_variance * w_anchor + cx_anchor
cy = y_pred[...,-11]*y_pred[...,-3]*y_pred[...,-5]+y_pred[...,-7] # cy = cy_pred * cy_variance * h_anchor + cy_anchor
w = tf.exp(y_pred[...,-10] * y_pred[...,-2]) * y_pred[...,-6] # w = exp(w_pred * variance_w) * w_anchor
h = tf.exp(y_pred[...,-9] * y_pred[...,-1]) * y_pred[...,-5] # h = exp(h_pred * variance_h) * h_anchor

In [81]:
cx.get_shape(),cy.get_shape(),w.get_shape(),h.get_shape()

# (1,100) 100 - n_boxes
# (1,100)
# (1,100)
# (1,100)

(TensorShape([Dimension(1), Dimension(100)]),
 TensorShape([Dimension(1), Dimension(100)]),
 TensorShape([Dimension(1), Dimension(100)]),
 TensorShape([Dimension(1), Dimension(100)]))

In [82]:
xmin = cx - 0.5 * w
ymin = cy - 0.5 * h
xmax = cx + 0.5 * w
ymax = cy + 0.5 * h

In [83]:
xmin.get_shape,ymin.get_shape,xmax.get_shape,ymax.get_shape

(<bound method Tensor.get_shape of <tf.Tensor 'sub_4:0' shape=(1, 100) dtype=float64>>,
 <bound method Tensor.get_shape of <tf.Tensor 'sub_5:0' shape=(1, 100) dtype=float64>>,
 <bound method Tensor.get_shape of <tf.Tensor 'add_12:0' shape=(1, 100) dtype=float64>>,
 <bound method Tensor.get_shape of <tf.Tensor 'add_13:0' shape=(1, 100) dtype=float64>>)

In [84]:
# (1,100) 100 - n_boxes
# (1,100)
# (1,100)
# (1,100)

def normalized_coords():
    #Add dim for batch_size
    xmin1 = tf.expand_dims(xmin * self.tf_img_width, axis=-1)
    ymin1 = tf.expand_dims(ymin * self.tf_img_height, axis=-1)
    xmax1 = tf.expand_dims(xmax * self.tf_img_width, axis=-1)
    ymax1 = tf.expand_dims(ymax * self.tf_img_height, axis=-1)
    return xmin1, ymin1, xmax1, ymax1

def non_normalized_coords():
    #Add dim for batch_size
    return tf.expand_dims(xmin, axis=-1),tf.expand_dims(ymin, axis=-1) tf.expand_dims(xmax, axis=-1),tf.expand_dims(ymax, axis=-1)

In [85]:
def normalized_coords():
    #Add dim for batch_size
    xmin1 = tf.expand_dims(xmin * tf_img_width, axis=-1)
    ymin1 = tf.expand_dims(ymin * tf_img_height, axis=-1)
    xmax1 = tf.expand_dims(xmax * tf_img_width, axis=-1)
    ymax1 = tf.expand_dims(ymax * tf_img_height, axis=-1)
    return xmin1, ymin1, xmax1, ymax1

def non_normalized_coords():
    #Add dim for batch_size
    return tf.expand_dims(xmin, axis=-1),tf.expand_dims(ymin, axis=-1),tf.expand_dims(xmax, axis=-1),tf.expand_dims(ymax, axis=-1)

In [86]:
xmin1, ymin1, xmax1, ymax1=normalized_coords()

In [87]:
xmin1.get_shape(), ymin1.get_shape(), xmax1.get_shape(), ymax1.get_shape()

(TensorShape([Dimension(1), Dimension(100), Dimension(1)]),
 TensorShape([Dimension(1), Dimension(100), Dimension(1)]),
 TensorShape([Dimension(1), Dimension(100), Dimension(1)]),
 TensorShape([Dimension(1), Dimension(100), Dimension(1)]))

In [88]:
# (1,100,1)
# (1,100,1)
# (1,100,1)
# (1,100,1)

In [89]:
xmin, ymin, xmax, ymax = tf.cond(tf.constant(True, name='normalize_coords'), normalized_coords, non_normalized_coords)

In [90]:
# (1,100,1)
# (1,100,1)
# (1,100,1)
# (1,100,1)

In [91]:
y_pred = tf.concat(values=[y_pred[...,:-12], xmin, ymin, xmax, ymax], axis=-1)

In [92]:
y_pred

<tf.Tensor 'concat_4:0' shape=(1, 100, 24) dtype=float64>

In [93]:
# (1,100,24) 100 boxes | 24 = 20 classes + 4 coords 

In [126]:
batch_item=y_pred
confidences = tf.expand_dims(batch_item[:,:,0], axis=-1) 
class_id=tf.fill(dims=tf.shape(confidences),value=tf.cast(0,tf.float64)) #shape (batch_size,n_boxes,1)
box_coordinates = batch_item[...,-4:] #shape (batch_size,n_boxes,4)

single_class = tf.concat([class_id, confidences, box_coordinates], axis=-1) #shape (batch_size,n_boxes,6)

In [127]:
confidences.get_shape(),class_id.get_shape(),box_coordinates.get_shape(),single_class.get_shape()

(TensorShape([Dimension(1), Dimension(100), Dimension(1)]),
 TensorShape([Dimension(1), Dimension(100), Dimension(1)]),
 TensorShape([Dimension(1), Dimension(100), Dimension(4)]),
 TensorShape([Dimension(1), Dimension(100), Dimension(6)]))

In [128]:
# (1,100,1)
# (1,100,1)
# (1,100,4)
# (1,100,6)

In [129]:
threshold_met=single_class[:,1] > 0.01

In [130]:
threshold_met.get_shape()

TensorShape([Dimension(1), Dimension(6)])

In [125]:
single_class=tf.boolean_mask(tensor=single_class,mask=threshold_met)

ValueError: Shapes (1, 100) and (1, 6) are incompatible

In [131]:
single_class[:,1].get_shape()

TensorShape([Dimension(1), Dimension(6)])