In [2]:
import tensorflow as tf
import numpy as np

# SSD Optimizer

In [3]:
num_vars = 7
num_classes = 3
num_anchors = 5
batch_size = 3

sess = tf.Session()

def tf_print(tensor):
    print(sess.run(tensor))

## Ground truth

In [55]:
# Classificatin tensor
# shape: (batch_size, num_anchors, num_classes)
# create random label input
gt_cl = []
for i in range(batch_size):
    x = np.eye(num_classes)
    # select row at random choice
    gt_cl.append(x[np.random.choice(x.shape[0], size=num_anchors)].tolist())
gt_cl = tf.constant(gt_cl)


# Localization tensor
# shape: (batch_size, num_anchors, 4) 
gt_loc = tf.random_normal(shape=[batch_size, num_anchors, 4])

# Senity check
tf_print(gt_cl)
tf_print(gt_loc)

[[[0. 0. 1.]
  [1. 0. 0.]
  [1. 0. 0.]
  [0. 1. 0.]
  [1. 0. 0.]]

 [[1. 0. 0.]
  [0. 0. 1.]
  [0. 0. 1.]
  [0. 0. 1.]
  [0. 0. 1.]]

 [[0. 0. 1.]
  [0. 0. 1.]
  [0. 1. 0.]
  [0. 0. 1.]
  [1. 0. 0.]]]
[[[ 1.9740728   0.00531998  0.05073944  1.9181988 ]
  [ 0.9836687   0.70806366  0.47138602 -0.11364946]
  [-0.7118693  -0.9339045   0.21746619  1.0575892 ]
  [-0.26124918 -0.02741667 -1.823249    0.07471395]
  [-1.289948    1.1268674   1.3789265  -0.07503923]]

 [[ 1.65405     2.0445502  -1.3374531  -0.706513  ]
  [-0.24702965  1.5699501   2.0597308  -0.26911816]
  [-0.37924895 -1.8115596  -0.7363666   1.2832676 ]
  [-0.89154685  1.0267923   0.44763702 -0.05752175]
  [-1.6820769  -0.88885796 -0.07986308  0.46741524]]

 [[-1.7251089   1.1858717  -2.1313174   3.0536988 ]
  [ 0.3876682   0.818881    0.3098628   1.6061039 ]
  [-0.7169859   0.2832111  -0.21452603  1.2144817 ]
  [-0.33307076 -0.4277424  -1.0602584   0.8980847 ]
  [ 1.0648307  -1.0960479   1.1318891  -0.932937  ]]]


## match_counters

In [56]:
# Number of anchors per sample
# Shape: (batch_size) 
total_num = tf.ones([batch_size], dtype=tf.int64) * tf.to_int64(num_anchors)

# Number of negative (not-matched) anchors per sample, computed by  
# counting boxes of the background class in each sample.            
# Shape: (batch_size) 
negatives_num = tf.count_nonzero(gt_cl[:, :, -1], axis=1)

# Number of positive (matched) anchors per sample                   
# Shape: (batch_size)
positives_num = total_num - negatives_num

# senity check
print("Number of anchors per batch")
print("Total: \n", sess.run(total_num))
print("Negative (bg=1): \n", sess.run(negatives_num))
print("Positive (bg!=1): \n",sess.run(positives_num))

Number of anchors per batch
Total: 
 [5 5 5]
Negative (bg=1): 
 [1 4 3]
Positive (bg!=1): 
 [4 1 2]


In [73]:
# Number of positives per sample that is division-safe              
# Shape: (batch_size)                                               
positives_num_safe = tf.where(tf.equal(positives_num, 0),           
                              tf.ones([batch_size])*10e-15,         
                              tf.to_float(positives_num))
tf_print(positives_num_safe)

[4. 1. 2.]


## match_masks

In [57]:
# Boolean tensor determining whether an anchor is a positive        
# Shape: (batch_size, num_anchors)  
positives_mask = tf.equal(gt_cl[:, :, -1], 0)
print("Positive mask:")
print(sess.run(positives_mask))

# Boolean tensor determining whether an anchor is a negative        
negatives_mask = tf.logical_not(positives_mask)
print("Negative mask:")
tf_print(negatives_mask)

Positive mask:
[[False  True  True  True  True]
 [ True False False False False]
 [False False  True False  True]]
Negative mask:
[[ True False False False False]
 [False  True  True  True  True]
 [ True  True False  True False]]


## confidence_loss

In [58]:
# Cross-entorpy tensor  (gt_cl => logits)                                        
# Shape: (batch_size, num_anchors) 
conf = tf.constant([[0.39282098, 0.4600574,  0.01153591, 0.14568441, 0.85366163],
                    [0.33491521, 0.18863397, 0.18936157, 0.18933312, 0.09754541],
                    [0.71655552, 0.89165631, 0.28296348, 0.45780019, 0.41719255]])
tf_print(conf)

[[0.39282098 0.4600574  0.01153591 0.1456844  0.85366166]
 [0.33491522 0.18863396 0.18936157 0.18933313 0.09754541]
 [0.71655554 0.89165634 0.28296348 0.45780018 0.41719255]]


In [59]:
# Sum up the loss of all the positive anchors                       
# Positives - the loss of neg anchors is zeroed out                 
# Shape: (batch_size, num_anchors) 
positives_conf = tf.where(positives_mask, conf, tf.zeros_like(conf))
tf_print(positives_conf)

[[0.         0.4600574  0.01153591 0.1456844  0.85366166]
 [0.33491522 0.         0.         0.         0.        ]
 [0.         0.         0.28296348 0.         0.41719255]]


In [60]:
# Total loss of positive anchors                                    
# Shape: (batch_size)                                               
positives_sum = tf.reduce_sum(positives_conf, axis=-1)
tf_print(positives_sum)

[1.4709394  0.33491522 0.70015603]


In [61]:
# Find neg anchors with highest conf loss                           
# Negatives - the loss of positive anchor is zeroed out             
# Shape: (batch_size, num_anchors)                                  
negatives_conf = tf.where(negatives_mask, conf, tf.zeros_like(conf))
tf_print(negatives_conf)

[[0.39282098 0.         0.         0.         0.        ]
 [0.         0.18863396 0.18936157 0.18933313 0.09754541]
 [0.71655554 0.89165634 0.         0.45780018 0.        ]]


In [62]:
# Top neg - sorted conf loss with highest one first                 
# Shape: (batch_size, num_anchors)
negatives_top = tf.nn.top_k(negatives_conf, num_anchors)[0]
tf_print(negatives_top)

[[0.39282098 0.         0.         0.         0.        ]
 [0.18936157 0.18933313 0.18863396 0.09754541 0.        ]
 [0.89165634 0.71655554 0.45780018 0.         0.        ]]


In [63]:
# Find num of negs we want to keep are                              
# Max num of negs to keep per sample - keep 3 time as many as pos   
# anchors in the sample                                             
# Shape: (batch_size) 
negatives_num_max = tf.minimum(negatives_num, 3*positives_num)
tf_print(negatives_num_max)

[1 3 3]


In [64]:
# mask out superfluous negs and compute the sum of the loss         
# Transposed vector of maximum negs per sample                      
# Shape: (batch_size, 1)                                            
negatives_num_max_t = tf.expand_dims(negatives_num_max, 1)
tf_print(negatives_num_max_t)

[[1]
 [3]
 [3]]


In [65]:
# Range tensor: [0, 1, 2, ..., num_anchors-1]                       
# Shape: (num_anchors)                                              
rng = tf.range(0, num_anchors, 1)
tf_print(rng)

[0 1 2 3 4]


In [66]:
# Row range, int64, row of a matrix                                 
# shape: (1, num_anchors)                                           
range_row = tf.to_int64(tf.expand_dims(rng, 0)) 
tf_print(range_row)

[[0 1 2 3 4]]


In [67]:
# Mask of maximum negatives - first `negative_num_max` elements     
# in corresponding row are `True`, the rest is false                
# Shape: (batch_size, num_anchors)
negatives_max_mask = tf.less(range_row, negatives_num_max_t)
tf_print(negatives_max_mask)

[[ True False False False False]
 [ True  True  True False False]
 [ True  True  True False False]]


In [70]:
# Max negatives - all the positives and superfluous negatives are zeroed out.                                                       
# Shape: (batch_size, num_anchors)                     
negatives_max = tf.where(negatives_max_mask, negatives_top, tf.zeros_like(negatives_top))
tf_print(negative_max)

[[0.39282098 0.         0.         0.         0.        ]
 [0.18936157 0.18933313 0.18863396 0.         0.        ]
 [0.89165634 0.71655554 0.45780018 0.         0.        ]]


In [71]:
# Sum of max negatives for each sample                              
# Shape: (batch_size)                                               
negatives_max_sum = tf.reduce_sum(negatives_max, axis=-1) 
tf_print(negatives_max_sum)

[0.39282098 0.5673287  2.0660121 ]


## Compute the confidence loss for each element 

In [72]:
# Total confidence loss for each sample                             
# Shape: (batch_size) 
confidence_loss = tf.add(positives_sum, negatives_max_sum)
tf_print(confidence_loss)

[1.8637604 0.9022439 2.766168 ]


In [74]:
# Mean confidence loss for the batch                                
# Shape: scalar                                                     
batch_confidence_loss = tf.reduce_mean(confidence_loss, name='confidence_loss')
tf_print(batch_confidence_loss)

1.8440574
