In [1]:
from keras.applications import MobileNetV2
from keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D, Lambda, \
    regularizers, MaxPooling2D, Concatenate, Add
from keras.layers.merge import add, concatenate
from keras.models import Model
from keras.engine.topology import Layer
import keras.backend as K
import tensorflow as tf
from keras.regularizers import l2
from functools import reduce
import operator
import numpy as np

tf.enable_eager_execution()

Using TensorFlow backend.


In [2]:
anchors = [12,30, 14,64, 25,93, 29,41, 43,76, 44,143, 77,92, 79,182, 246,277]
max_grid = [608, 608]
batch_size = 16
warmup_batches = 0
ignore_thresh = 0.5
yolo_loss_options = {
    'grid_scale': 1,
    'obj_scale': 1,
    'noobj_scale': 1,
    'xywh_scale': 1,
    'class_scale': 1
}

In [3]:
anchors = anchors[:6]
max_grid = [num // 32 for num in max_grid]

In [4]:
ignore_thresh = ignore_thresh
warmup_batches = warmup_batches
anchors = tf.constant(anchors, dtype=K.floatx(), shape=[1, 1, 1, 3, 2])

grid_scale = yolo_loss_options['grid_scale']
obj_scale = yolo_loss_options['obj_scale']
noobj_scale = yolo_loss_options['noobj_scale']
xywh_scale = yolo_loss_options['xywh_scale']
class_scale = yolo_loss_options['class_scale']

# make a persistent mesh grid
max_grid_h, max_grid_w = max_grid

cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(max_grid_w), [max_grid_h]), (1, max_grid_h, max_grid_w, 1, 1)))
cell_y = tf.to_float(tf.reshape(tf.tile(tf.range(max_grid_h), [max_grid_w]), (1, max_grid_w, max_grid_h, 1, 1)))
cell_y = tf.transpose(cell_y, (0, 2, 1, 3, 4))
cell_grid = tf.tile(tf.concat([cell_x, cell_y], -1), [batch_size, 1, 1, 3, 1])

In [5]:
input_image = np.ones((batch_size, 608, 608, 3), dtype=np.float32)
y_pred = np.zeros((batch_size, 19, 19, 24), dtype=np.float32)
y_true = np.zeros((batch_size, 19, 19, 3, 8), dtype=np.float32)
true_boxes = np.zeros((batch_size, 1, 1, 1, 200, 4), dtype=np.float32)

In [6]:
# adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class]
y_pred = tf.reshape(y_pred, tf.concat([tf.shape(y_pred)[:3], tf.constant([3, -1])], axis=0))
y_pred.shape

TensorShape([Dimension(16), Dimension(19), Dimension(19), Dimension(3), Dimension(8)])

In [7]:
# initialize the masks
object_mask = tf.expand_dims(y_true[..., 4], 4)
object_mask.shape

TensorShape([Dimension(16), Dimension(19), Dimension(19), Dimension(3), Dimension(1)])

In [8]:
# compute grid factor and net factor
grid_h = tf.shape(y_true)[1]
grid_w = tf.shape(y_true)[2]
grid_factor = tf.reshape(tf.cast([grid_w, grid_h], K.floatx()), [1, 1, 1, 1, 2])

net_h = tf.shape(input_image)[1]
net_w = tf.shape(input_image)[2]
net_factor = tf.reshape(tf.cast([net_w, net_h], K.floatx()), [1, 1, 1, 1, 2])

grid_factor, net_factor

(<tf.Tensor: id=54, shape=(1, 1, 1, 1, 2), dtype=float32, numpy=array([[[[[19., 19.]]]]], dtype=float32)>,
 <tf.Tensor: id=70, shape=(1, 1, 1, 1, 2), dtype=float32, numpy=array([[[[[608., 608.]]]]], dtype=float32)>)

In [9]:
"""
Adjust prediction
"""
pred_box_xy = cell_grid + tf.sigmoid(y_pred[..., :2])  # sigma(t_xy) + c_xy
pred_box_wh = y_pred[..., 2:4]  # t_wh
pred_box_conf = tf.expand_dims(tf.sigmoid(y_pred[..., 4]), 4)  # adjust confidence
pred_box_class = y_pred[..., 5:]  # adjust class probabilities

In [10]:
true_box_xy = y_true[..., 0:2]  # (sigma(t_xy) + c_xy)
true_box_wh = y_true[..., 2:4]  # t_wh
true_box_conf = tf.expand_dims(y_true[..., 4], 4)
true_box_class = tf.argmax(y_true[..., 5:], -1)

In [11]:
conf_delta = pred_box_conf - 0

In [12]:
# then, ignore the boxes which have good overlap with some true box
true_xy = true_boxes[..., 0:2] / grid_factor
true_wh = true_boxes[..., 2:4] / net_factor

In [13]:
true_wh_half = true_wh / 2.
true_mins = true_xy - true_wh_half
true_maxes = true_xy + true_wh_half

In [15]:
pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4)
pred_wh = tf.expand_dims(tf.exp(pred_box_wh) * anchors / net_factor, 4)

In [16]:
pred_wh_half = pred_wh / 2.
pred_mins = pred_xy - pred_wh_half
pred_maxes = pred_xy + pred_wh_half

In [18]:
intersect_mins = tf.maximum(pred_mins, true_mins)
intersect_maxes = tf.minimum(pred_maxes, true_maxes)
intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

In [19]:
true_areas = true_wh[..., 0] * true_wh[..., 1]
pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

In [20]:
union_areas = pred_areas + true_areas - intersect_areas
iou_scores = tf.truediv(intersect_areas, union_areas)

In [22]:
best_ious = tf.reduce_max(iou_scores, axis=4)
conf_delta *= tf.expand_dims(tf.to_float(best_ious < ignore_thresh), 4)

In [35]:
true_box_xy, true_box_wh, xywh_mask = true_box_xy, true_box_wh, object_mask

In [33]:
wh_scale = tf.exp(true_box_wh) * anchors / net_factor
wh_scale = tf.expand_dims(2 - wh_scale[..., 0] * wh_scale[..., 1], axis=4)

In [38]:
xy_delta = xywh_mask * (pred_box_xy - true_box_xy) * wh_scale * xywh_scale
wh_delta = xywh_mask * (pred_box_wh - true_box_wh) * wh_scale * xywh_scale
conf_delta = object_mask * (pred_box_conf - true_box_conf) * obj_scale + (1 - object_mask) * conf_delta * noobj_scale
class_delta = object_mask * \
              tf.expand_dims(
                  tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class),
                  4) * \
              class_scale

In [41]:
loss_xy = tf.reduce_mean(tf.square(xy_delta), [1, 2, 3, 4])
loss_wh = tf.reduce_mean(tf.square(wh_delta), [1, 2, 3, 4])
loss_conf = tf.reduce_mean(tf.square(conf_delta), [1, 2, 3, 4])
loss_class = tf.reduce_mean(class_delta, [1, 2, 3, 4])

loss_xy + loss_wh + loss_conf + loss_class

<tf.Tensor: id=281, shape=(16,), dtype=float32, numpy=
array([0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25,
       0.25, 0.25, 0.25, 0.25, 0.25], dtype=float32)>

In [None]:
1 - object_mask