In [1]:
import keras
import os
import numpy as np
import keras.backend as K
import tensorflow as tf

os.environ['CUDA_VISIBLE_DEVICES'] = ""

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [636]:
def bbox_overlap_iou(bboxes1, bboxes2):
    """
    Args:
        bboxes1: shape (total_bboxes1, 4)
            with x1, y1, x2, y2 point order.
        bboxes2: shape (total_bboxes2, 4)
            with x1, y1, x2, y2 point order.
        p1 *-----
           |     |
           |_____* p2
    Returns:
        Tensor with shape (total_bboxes1, total_bboxes2)
        with the IoU (intersection over union) of bboxes1[i] and bboxes2[j]
        in [i, j].
    """

    x11, y11, x12, y12 = K.tf.split(bboxes1, 4, axis=2)
    x21, y21, x22, y22 = K.tf.split(bboxes2, 4, axis=2)
    x11 = tf.transpose(x11, (1, 2, 0))
    y11 = tf.transpose(y11, (1, 2, 0))
    x12 = tf.transpose(x12, (1, 2, 0))
    y12 = tf.transpose(y12, (1, 2, 0))

    x21 = tf.transpose(x21, (1, 2, 0))
    y21 = tf.transpose(y21, (1, 2, 0))
    x22 = tf.transpose(x22, (1, 2, 0))
    y22 = tf.transpose(y22, (1, 2, 0))

    xI1 = K.maximum(x11, K.transpose(x21))
    yI1 = K.maximum(y11, K.transpose(y21))

    xI2 = K.minimum(x12, K.transpose(x22))
    yI2 = K.minimum(y12, K.transpose(y22))

    inter_area = (xI2 - xI1 + 1) * (yI2 - yI1 + 1)

    bboxes1_area = (x12 - x11 + 1) * (y12 - y11 + 1)
    bboxes2_area = (x22 - x21 + 1) * (y22 - y21 + 1)

    union = (bboxes1_area + K.transpose(bboxes2_area)) - inter_area

    return tf.transpose(K.maximum(inter_area / union, 0), (1, 0, 2))


def bbox_iog(predicted, ground_truth):
    x11, y11, x12, y12 = K.tf.split(predicted, 4, axis=2)
    x21, y21, x22, y22 = K.tf.split(ground_truth, 4, axis=2)

    xI1 = K.maximum(x11, K.transpose(x21))
    yI1 = K.maximum(y11, K.transpose(y21))

    xI2 = K.minimum(x12, K.transpose(x22))
    yI2 = K.minimum(y12, K.transpose(y22))

    intersect_area = (xI2 - xI1 + 1) * (yI2 - yI1 + 1)

    gt_area = (x22 - x21 + 1) * (y22 - y21 + 1)

    return K.maximum(intersect_area / gt_area, 0)


def smooth_l1_distance(y_true, y_pred, delta=0.5):
    sigma_squared = delta ** 2

    # compute smooth L1 loss
    # f(x) = 0.5 * (sigma * x)^2          if |x| < 1 / sigma / sigma
    #        |x| - 0.5 / sigma / sigma    otherwise
    regression_diff = y_pred - y_true
    regression_diff = K.abs(regression_diff)
    return K.tf.where(
        K.less(regression_diff, 1.0 / sigma_squared),
        0.5 * sigma_squared * K.pow(regression_diff, 2),
        regression_diff - 0.5 / sigma_squared
    )


def smooth_ln(x, delta):
    cond = K.less_equal(x, delta)
    true_fn = -K.log(1 - x)
    false_fn = ((x - delta) / (1 - delta)) - K.log(1 - delta)
    return where(cond, true_fn, false_fn)


def attraction_term(y_true, y_pred, highest_iou):
    # Найти из y_true бокс с большим IOU для всех y_pred
    # Прогоняем его через smooth_l1
    # Суммиируем
    # Делим на количество y_pred
    
    len_of_batch = K.tf.cast(tf.shape(y_true)[0], dtype=K.tf.int64)
    len_of_true = K.tf.cast(tf.shape(y_true)[1], dtype=K.tf.int64)
    
    iou_max_indices = K.tf.argmax(highest_iou[..., 0], axis=2)
    pred_boxes_with_max_ious = None
    
    for batch_num in np.arange(y_true.shape[0], dtype=np.int64):
        indices = K.stack([K.tf.cast(K.tf.tile([batch_num], [y_true.shape[1]]), dtype=K.tf.int64),
                           K.tf.range(len_of_true), 
                           iou_max_indices[batch_num]])
        indices = K.transpose(indices)
        
        if pred_boxes_with_max_ious is None: 
            pred_boxes_with_max_ious = K.tf.gather_nd(highest_iou, indices)
        else:
            pred_boxes_with_max_ious = K.tf.concat([pred_boxes_with_max_ious,
                                                    K.tf.gather_nd(highest_iou, indices)], axis=0)
    return K.sum(smooth_l1_distance(y_true, pred_boxes_with_max_ious)) / K.cast(len_of_true, K.floatx())


def repulsion_term_gt(y_true, y_pred, iou_over_predicted, alpha):
    # Найти из y_true бокс с вторым по величине IOU
    # Находим IoG между этим боксом и y_true
    # Прогоняем IoG через smooth_ln
    # Суммиируем
    # Делим на количество y_pred
    
    _, indices_2highest_iou = K.tf.nn.top_k(iou_over_predicted, k=2)
    indices_2highest_iou = indices_2highest_iou[:, 1]
    gt_2highest_iou = K.map_fn(lambda i: K.tf.gather_nd(y_true, [i]), indices_2highest_iou, dtype=K.floatx())
    iog = K.map_fn(lambda x: bbox_iog([x[0]], [x[1]]), (y_pred, gt_2highest_iou), dtype=K.floatx())
    iog = K.squeeze(iog, axis=2)
    return K.sum(smooth_ln(iog, alpha)) / K.cast(K.shape(y_pred)[0], K.floatx())


def repulsion_term_box(y_true, y_pred, betta):
    # Делим все множество y_pred боксов на бокс + цель (Проходимся циклом и оставляем для каждой y_true бокс из y_pred с наибольшим IoU)
    # Находим IoU для каждой пары сочетания (Bi, Bj)
    # Для каждой пары находим отношение smooth_ln(IoU) / IoU + e
    # Суммиируем
    return K.variable(0.0, dtype=K.floatx())


def repulsion_loss(y_true, y_pred):
    # Фильтруем y_pred, оставляя те, у которых IOU > 0,5 хотябы с одним y_true

    y_pred = y_pred[..., :4]
    y_true = y_true[..., :4]
    
    len_of_gt = K.tf.shape(y_true)[1]
    len_of_pred = K.tf.shape(y_pred)[1]
    
    iou_over_gt = bbox_overlap_iou(y_true, y_pred)
    
    tiled_for_concat = K.tf.tile(K.tf.expand_dims(y_pred, axis=1), [1, len_of_gt, 1, 1])
    highest_iou = K.tf.concat([K.tf.expand_dims(iou_over_gt, axis=3), tiled_for_concat], axis=3)
    highest_iou = highest_iou[..., 1:]
    
    alpha = 0.5
    betta = 0.5

    return K.sum([
        attraction_term(y_true, y_pred, highest_iou),
        # repulsion_term_gt(y_true, y_pred_masked, iou_over_predicted, alpha),
        # repulsion_term_box(y_true, y_pred_masked, betta)
                 ])

In [534]:
def rand_gen(m):
    count = 0
    while count < 1000000:
        yield np.random.randint(0, m)
        count += 1
        
a, b = rand_gen(1280), rand_gen(720)

image_w, image_h = 1280, 720
y_true = np.array([[23, 41, 58, 60, 1], 
                   [322, 33, 450, 99, 1],
                   [159, 600, 261, 692, 1],
                   [234, 4, 249, 19, 1], 
                   [1000, 651, 1101, 718, 1], 
                   [76, 98, 257, 152, 1]], dtype=K.floatx())

y_pred = np.array([[i, j, k, l, np.random.sample()] for i, j, k, l in zip(a, b, a, b) if i < k and j < l], 
                  dtype=K.floatx())

y_true[:, 0] /= 1280
y_true[:, 1] /= 720
y_true[:, 2] /= 1280
y_true[:, 3] /= 720

y_pred[:, 0] /= 1280
y_pred[:, 1] /= 720
y_pred[:, 2] /= 1280
y_pred[:, 3] /= 720

print('y_true len: ', len(y_true))
print('y_pred len: ', len(y_pred))

y_true len:  6
y_pred len:  124867


In [637]:
loss = repulsion_loss(np.expand_dims(y_true, axis=0), 
                      np.expand_dims(y_pred, axis=0))
%time print('repulsion_loss: ', K.eval(loss))

repulsion_loss:  0.19861376
CPU times: user 787 ms, sys: 12.9 ms, total: 800 ms
Wall time: 447 ms


In [83]:
y_pred = y_pred[:, :4]
y_true = y_true[:, :4]

iou_over_predicted = bbox_overlap_iou(y_pred, y_true)
highest_iou = K.max(iou_over_predicted, axis=1)
iou_gt_05 = where(K.greater(highest_iou, 0.5))
y_pred_masked = K.tf.gather_nd(y_pred, iou_gt_05)
iou_over_predicted = K.tf.gather_nd(iou_over_predicted, iou_gt_05)

iou_over_true = bbox_overlap_iou(y_true, y_pred_masked)
highest_iou = K.argmax(iou_over_true, axis=1)
y_pred_masked = tf.map_fn(lambda x: tf.gather_nd(y_pred, [x]), highest_iou, dtype=K.floatx())

print(K.eval(bbox_overlap_iou(y_pred_masked, y_true)))

[[0.23452422 0.42505637 0.2552724  0.29780933 0.27136403 0.35316977]
 [0.6356587  0.61754245 0.27363575 0.6100713  0.08489684 0.7794309 ]
 [0.34234455 0.44145104 0.27544335 0.3565427  0.2302841  0.46962705]
 [0.13993059 0.23211455 0.48920596 0.15344568 0.4017849  0.2193756 ]
 [0.4293052  0.7077038  0.2072057  0.5115235  0.09686629 0.62740034]
 [0.22542173 0.3891461  0.3039077  0.2775077  0.33282655 0.33126658]]


In [68]:
y_true

array([[0.01796875, 0.05694444, 0.0453125 , 0.08333334],
       [0.2515625 , 0.04583333, 0.3515625 , 0.1375    ],
       [0.12421875, 0.8333333 , 0.20390625, 0.9611111 ],
       [0.1828125 , 0.00555556, 0.19453125, 0.02638889],
       [0.78125   , 0.90416664, 0.86015624, 0.99722224],
       [0.059375  , 0.13611111, 0.20078126, 0.21111111]], dtype=float32)

In [72]:
K.eval(bbox_overlap_iou([y_true[0]], [a[0]]))

ValueError: Tried to convert 'value' to a tensor and failed. Error: Argument must be a dense tensor: [array([0.01796875, 0.05694444, 0.0453125 , 0.08333334], dtype=float32)] - got shape [1, 4], but wanted [1].

In [320]:
iou_over_predicted = K.tf.map_fn(lambda predicted: bboxes_iou(predicted, y_true),
                                 y_pred,
                                 dtype=K.floatx(),
                                 parallel_iterations=10000,
                                 swap_memory=True)
K.eval(iou_over_predicted)

highest_iou = K.max(iou_over_predicted, axis=1)

iou_gt_05 = where(K.greater(highest_iou, 0.1))

y_pred = tf.gather_nd(y_pred, iou_gt_05)

iou_over_predicted = tf.gather_nd(iou_over_predicted, iou_gt_05)


indices_highest_iou = K.argmax(iou_over_predicted, axis=1)

gt_highest_iou = K.tf.map_fn(lambda i: K.tf.gather_nd(y_true, [i]), indices_highest_iou, dtype=K.floatx(),
                             parallel_iterations=10000, swap_memory=True)

indices_and_predictions = K.stack([y_pred, gt_highest_iou], axis=1)

smooth_l1_dist = K.tf.map_fn(lambda x: smooth_l1_distance(x[1], x[0]), indices_and_predictions,
                             parallel_iterations=10000, swap_memory=True)

s = K.sum(smooth_l1_dist) / K.cast(K.shape(y_pred)[0], K.floatx())


CPU times: user 7.06 s, sys: 334 ms, total: 7.39 s
Wall time: 1.62 s


In [353]:
K.eval(K.tf.gather_nd(y_true, K.stack([tf.zeros_like(indices_highest_iou), indices_highest_iou], axis=1)))

InvalidArgumentError: flat indices[559, :] = [0, 4] does not index into param (shape: [6,4]).
	 [[Node: GatherNd_79 = GatherNd[Tindices=DT_INT64, Tparams=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](GatherNd_79/params, stack_87)]]

Caused by op 'GatherNd_79', defined at:
  File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.6/dist-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelapp.py", line 478, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/usr/local/lib/python3.6/dist-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/usr/local/lib/python3.6/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 281, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 232, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 397, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2705, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2815, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2869, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-353-ce5f9713d1c2>", line 1, in <module>
    K.eval(K.tf.gather_nd(y_true, K.stack([tf.zeros_like(indices_highest_iou), indices_highest_iou], axis=1)))
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 1915, in gather_nd
    "GatherNd", params=params, indices=indices, name=name)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 3271, in create_op
    op_def=op_def)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 1650, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): flat indices[559, :] = [0, 4] does not index into param (shape: [6,4]).
	 [[Node: GatherNd_79 = GatherNd[Tindices=DT_INT64, Tparams=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](GatherNd_79/params, stack_87)]]


In [354]:
K.eval(K.tile([0], K.shape(indices_highest_iou)[0]))

ValueError: Shape must be rank 1 but is rank 0 for 'Tile_4' (op: 'Tile') with input shapes: [1], [].

In [335]:
K.shape(indices_highest_iou)

<tf.Tensor 'Shape_59:0' shape=(1,) dtype=int32>

In [272]:
def bbox_overlap_iou(bboxes1, bboxes2):
    """
    Args:
        bboxes1: shape (total_bboxes1, 4)
            with x1, y1, x2, y2 point order.
        bboxes2: shape (total_bboxes2, 4)
            with x1, y1, x2, y2 point order.
        p1 *-----
           |     |
           |_____* p2
    Returns:
        Tensor with shape (total_bboxes1, total_bboxes2)
        with the IoU (intersection over union) of bboxes1[i] and bboxes2[j]
        in [i, j].
    """

    x11, y11, x12, y12 = K.tf.split(bboxes1, 4, axis=1)
    x21, y21, x22, y22 = K.tf.split(bboxes2, 4, axis=1)

    xI1 = K.maximum(x11, K.transpose(x21))
    yI1 = K.maximum(y11, K.transpose(y21))

    xI2 = K.minimum(x12, K.transpose(x22))
    yI2 = K.minimum(y12, K.transpose(y22))

    inter_area = (xI2 - xI1 + 1) * (yI2 - yI1 + 1)

    bboxes1_area = (x12 - x11 + 1) * (y12 - y11 + 1)
    bboxes2_area = (x22 - x21 + 1) * (y22 - y21 + 1)

    union = (bboxes1_area + K.transpose(bboxes2_area)) - inter_area

    return K.maximum(inter_area / union, 0)

In [299]:
y_true = y_true[:, :4]
y_pred = y_pred[:, :4]
K.eval(bbox_overlap_iou(y_pred, y_true)).shape

(105, 6)

In [96]:
y_pred.shape

(125574, 4)

In [621]:
def bbox_overlap_iou(bboxes1, bboxes2):
    x11, y11, x12, y12 = np.split(bboxes1, 4, axis=2)
    x21, y21, x22, y22 = np.split(bboxes2, 4, axis=2)
    x11 = np.transpose(x11, (1, 2, 0))
    y11 = np.transpose(y11, (1, 2, 0))
    x12 = np.transpose(x12, (1, 2, 0))
    y12 = np.transpose(y12, (1, 2, 0))

    x21 = np.transpose(x21, (1, 2, 0))
    y21 = np.transpose(y21, (1, 2, 0))
    x22 = np.transpose(x22, (1, 2, 0))
    y22 = np.transpose(y22, (1, 2, 0))

    xI1 = np.maximum(x11, np.transpose(x21))
    yI1 = np.maximum(y11, np.transpose(y21))

    xI2 = np.minimum(x12, np.transpose(x22))
    yI2 = np.minimum(y12, np.transpose(y22))

    inter_area = (xI2 - xI1 + 1) * (yI2 - yI1 + 1)

    bboxes1_area = (x12 - x11 + 1) * (y12 - y11 + 1)
    bboxes2_area = (x22 - x21 + 1) * (y22 - y21 + 1)

    union = (bboxes1_area + np.transpose(bboxes2_area)) - inter_area

    return np.transpose(np.maximum(inter_area / union, 0), (1, 0, 2)) 


def smooth_l1_distance(y_true, y_pred, delta=0.5):
    sigma_squared = delta ** 2

    regression_diff = y_pred - y_true

    regression_diff = np.abs(regression_diff)
    
    return np.where(regression_diff < (1.0 / sigma_squared),
                    0.5 * sigma_squared * np.power(regression_diff, 2),
                    regression_diff - 0.5 / sigma_squared)

In [629]:
def test_attraction(y_true, y_pred):
    y_true = np.expand_dims(y_true, axis=0)[..., :4]
    y_pred = np.expand_dims(y_pred, axis=0)[..., :4]
    
    iou_over_gt = bbox_overlap_iou(y_true, y_pred)

    len_of_batch = y_true.shape[0]
    len_of_gt = y_true.shape[1]
    len_of_pd = y_pred.shape[1]

    a = np.zeros((len_of_batch, len_of_gt, len_of_pd, 5))
    a[..., 0] = iou_over_gt
    a[..., 1:] = y_pred
    maxes = np.argmax(a[..., 0], axis=2)
    pred_boxes_with_max_ious = a[([0] * len_of_batch, range(len_of_gt), maxes[0])][..., 1:]
    return np.sum(smooth_l1_distance(y_true, pred_boxes_with_max_ious)) / np.shape(y_pred)[0]

test_attraction(y_true, y_pred)

0.0003210969503209077

In [613]:
print(indices)

Tensor("transpose_1068:0", shape=(6, 3), dtype=int32)


In [614]:
indices = K.stack([K.tf.tile([0], [6]), K.tf.range(6), maxes[0]])
indices = K.transpose(indices)

In [579]:
K.eval(indices)

array([[     0,      0,  79985],
       [     0,      1,  13817],
       [     0,      2,  57070],
       [     0,      3,  78660],
       [     0,      4, 108176],
       [     0,      5,  25231]], dtype=int32)

<tf.Tensor 'Tile_52:0' shape=(6,) dtype=int32>