In [1]:
import tensorflow as tf
import numpy as np

import sys
sys.path.append('../')

import dataset
import models
from utils import iou

input_shape = (448, 448, 3)
seed = 42

In [2]:
train_df = dataset.get_dataframe('../fruits_dataset/train')
train_dataset = dataset.load_dataset_from_df(train_df, num_epochs=10, seed=seed)

240it [00:05, 47.39it/s]


In [3]:
first_batch = next(iter(train_dataset))
batch_input = first_batch[0]
batch_target = first_batch[1]

In [4]:
yolo = models.YoloV1(input_shape=input_shape, num_classes=3)

Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.


In [5]:
batch_output = yolo(batch_input, training=True)

In [6]:
t_cellbox = batch_target[..., :4]
t_obj = batch_target[..., 4]
t_cls = batch_target[..., 5:]

o_cellbox1 = batch_output[..., :4]
o_obj1 = batch_output[..., 4]
o_cellbox2 = batch_output[..., 5:9]
o_obj2 = batch_output[..., 9]
o_cls = batch_output[..., 10:]

In [7]:
def convert_xy_offset_to_xy_center(cellboxes):
    x_offset = cellboxes[..., 0]
    y_offset = cellboxes[..., 1]
    w = cellboxes[..., 2]
    h = cellboxes[..., 3]
    
    num_w_cells = x_offset.shape[-1]
    num_h_cells = x_offset.shape[-2]
    
    # w_cell_indices: [[0, 1, 2, ...], [0, 1, 2, ...], ...]
    w_cell_indices = np.array(range(num_w_cells))
    w_cell_indices = np.broadcast_to(w_cell_indices, x_offset.shape)
    
    # h_cell_indices: [[0, 0, 0, ...], [1, 1, 1, ...], ....]
    h_cell_indices = np.array(range(num_h_cells))
    h_cell_indices = np.repeat(h_cell_indices, 7, 0).reshape(x_offset.shape[-2:])
    h_cell_indices = np.broadcast_to(h_cell_indices, x_offset.shape)
    
    x_center = (x_offset + w_cell_indices) / num_w_cells
    y_center = (y_offset + h_cell_indices) / num_h_cells
    
    mask = tf.cast((x_offset > 0), tf.float32)
    
    x_center *= mask
    y_center *= mask
    
    xy = tf.stack([x_center, y_center], axis=-1)
    
    w = tf.expand_dims(w, -1)
    h = tf.expand_dims(h, -1)
    
    bboxes = tf.concat([xy, w, h], axis=-1)
    
    return bboxes

def convert_to_corner_bbox(cellboxes):
    bboxes = convert_xy_offset_to_xy_center(cellboxes)
    x, y, w, h = bboxes[..., 0], bboxes[..., 1], bboxes[..., 2], bboxes[..., 3]
    
    x_min = x - (w / 2)
    y_min = y - (h / 2)
    x_max = x + (w / 2)
    y_max = y + (h / 2)
    
    corner_bboxes = tf.stack([x_min, y_min, x_max, y_max], axis=-1)
        
    return corner_bboxes

In [8]:
t_corner_bbox = convert_to_corner_bbox(t_cellbox)
o_corner_bbox1 = convert_to_corner_bbox(o_cellbox1)
o_corner_bbox2 = convert_to_corner_bbox(o_cellbox2)

In [9]:
iou_box1 = iou(o_corner_bbox1, t_corner_bbox)
iou_box2 = iou(o_corner_bbox2, t_corner_bbox)

In [10]:
ious = tf.stack([iou_box1, iou_box2], axis=-1)

In [11]:
best_box = tf.math.argmax(ious, axis=-1)
best_box = tf.cast(best_box, tf.float32)

# **Demo confidence loss**

## **Obj loss**

In [12]:
predicted_obj1 = o_obj1 * t_obj * (1 - best_box)
predicted_obj2 = o_obj2 * t_obj * best_box 

In [13]:
predicted_obj = predicted_obj1 + predicted_obj2

In [14]:
sqr_err = tf.square(t_obj - predicted_obj)

In [15]:
single_loss = tf.reduce_sum(sqr_err, [1, 2])

In [16]:
batch_loss = tf.reduce_mean(single_loss)
batch_loss

<tf.Tensor: shape=(), dtype=float32, numpy=5.0211525>

In [17]:
single_loss[0]

<tf.Tensor: shape=(), dtype=float32, numpy=1.4291753>

## **No obj loss**

In [18]:
t_noobj = 1 - t_obj
noobj_weight = 0.5

In [19]:
predicted_noobj1 = o_obj1 * t_noobj
predicted_noobj2 = o_obj2 * t_noobj

In [20]:
sqr_err_noobj1 = tf.square((t_obj * t_noobj) - predicted_noobj1)
sqr_err_noobj2 = tf.square((t_obj * t_noobj) - predicted_noobj2)

In [21]:
sqr_err_noobj = tf.reduce_sum(sqr_err_noobj1 + sqr_err_noobj2, [1, 2])

In [22]:
batch_loss = noobj_weight * tf.reduce_mean(sqr_err_noobj)

In [23]:
batch_loss

<tf.Tensor: shape=(), dtype=float32, numpy=116.471176>

In [24]:
sqr_err_noobj[0]

<tf.Tensor: shape=(), dtype=float32, numpy=214.69095>

## **Combine obj & no-obj loss**

In [25]:
train_df = dataset.get_dataframe('../fruits_dataset/train')
train_dataset = dataset.load_dataset_from_df(train_df, num_epochs=1, batch_size=1)

240it [00:04, 48.45it/s]


In [26]:
first_batch = next(iter(train_dataset))
batch_input = first_batch[0]
batch_target = first_batch[1]

In [27]:
yolo = models.YoloV1(input_shape=input_shape, num_classes=3)

In [28]:
with tf.GradientTape() as tape:
    batch_output = yolo(batch_input, training=True)
    noobj_weight = 0.5
    
    # Get xywh, obj, class
    t_cellbox = batch_target[..., :4]
    t_obj = batch_target[..., 4]
    t_cls = batch_target[..., 5:]
    
    o_cellbox1 = batch_output[..., :4]
    o_obj1 = batch_output[..., 4]
    o_cellbox2 = batch_output[..., 5:9]
    o_obj2 = batch_output[..., 9]
    o_cls = batch_output[..., 10:]
    
    # Get the highest iou
    t_corner_bbox = convert_to_corner_bbox(t_cellbox)
    o_corner_bbox1 = convert_to_corner_bbox(o_cellbox1)
    o_corner_bbox2 = convert_to_corner_bbox(o_cellbox2)
    
    iou_box1 = iou(o_corner_bbox1, t_corner_bbox)
    iou_box2 = iou(o_corner_bbox2, t_corner_bbox)
    ious = tf.stack([iou_box1, iou_box2], axis=-1)
    best_box = tf.math.argmax(ious, axis=-1)
    best_box = tf.cast(best_box, tf.float32)
    
    # Compute no obj loss
    noobj_mask = 1 - t_obj
    o_noobj1, o_noobj2 = o_obj1 * noobj_mask, o_obj2 * noobj_mask
    sqr_err_noobj1 = tf.square((t_obj * noobj_mask) - o_noobj1)
    sqr_err_noobj2 = tf.square((t_obj * noobj_mask) - o_noobj2)
    single_sse_noobj = tf.reduce_sum(sqr_err_noobj1 + sqr_err_noobj2, [1, 2])
    noobj_loss = noobj_weight * tf.reduce_mean(single_sse_noobj)
    
    # Compute obj loss
    predicted_obj1 = o_obj1 * t_obj * (1 - best_box)
    predicted_obj2 = o_obj2 * t_obj * best_box 
    predicted_obj = predicted_obj1 + predicted_obj2
    sqr_err_obj = tf.square(t_obj - predicted_obj)
    single_sse_obj = tf.reduce_sum(sqr_err_obj, [1, 2])
    obj_loss = tf.reduce_mean(single_sse_obj)
    
    loss =  obj_loss + (noobj_weight * noobj_loss)
    
grad = tape.gradient(loss, yolo.trainable_variables)

In [29]:
tf.reduce_sum(tf.cast(grad[-1] != 0, tf.int32)) == 97

<tf.Tensor: shape=(), dtype=bool, numpy=True>

In [34]:
tf.reduce_sum(tf.cast(grad[-2] != 0, tf.int32)) / 97

<tf.Tensor: shape=(), dtype=float64, numpy=2090.0>

In [31]:
loss

<tf.Tensor: shape=(), dtype=float32, numpy=48.815926>

In [32]:
single_loss[0] + (noobj_weight * sqr_err_noobj[0])

<tf.Tensor: shape=(), dtype=float32, numpy=108.77465>

In [45]:
sqr_err_noobj1

<tf.Tensor: shape=(1, 7, 7), dtype=float32, numpy=
array([[[1.0665561e+00, 5.2230082e-02, 4.0848107e+00, 7.8818709e-02,
         7.1577220e+00, 1.0765925e-01, 9.2036295e+00],
        [3.0819429e-02, 1.1454989e+00, 3.6553915e+00, 3.2560072e+00,
         1.5911919e+00, 1.1613204e-03, 3.7922988e+00],
        [3.3286065e-02, 6.3593704e-01, 7.4153000e-01, 2.5554390e+00,
         2.4054742e+00, 3.6368861e+00, 1.0608184e+00],
        [3.1570528e+00, 3.3589937e-02, 5.3453803e-01, 0.0000000e+00,
         2.4120870e+00, 2.0498619e+00, 3.0602882e+00],
        [5.2292359e-01, 2.7425954e+00, 1.7008550e+00, 5.0593589e-02,
         1.6772571e-01, 2.1912716e-02, 2.9453502e+00],
        [8.7318286e-02, 1.1806891e+00, 5.2780499e+00, 1.8701401e-02,
         1.4212713e-02, 3.3317404e+00, 5.9079771e+00],
        [4.7128768e+00, 5.6554375e+00, 7.2541757e+00, 4.9465972e-01,
         4.2607225e-02, 7.5958896e-01, 1.5972475e+00]]], dtype=float32)>

In [46]:
sqr_err_noobj2

<tf.Tensor: shape=(1, 7, 7), dtype=float32, numpy=
array([[[1.6340977e+00, 9.7928280e-01, 4.7584634e+00, 8.2056367e-01,
         8.9722151e-01, 1.4834647e+01, 1.2269311e+00],
        [7.9363722e-01, 6.7697883e-01, 1.5157050e-02, 4.2515561e-02,
         3.4512694e+00, 2.2909334e+00, 4.1265502e+00],
        [8.6343545e-01, 9.4270295e-01, 9.8278545e-02, 1.0083281e+01,
         2.4014802e-01, 3.3784911e-02, 2.3989564e-02],
        [7.0858437e-01, 3.7596505e-02, 6.8288267e-02, 0.0000000e+00,
         7.6473528e-01, 5.3322180e-03, 5.4734826e-01],
        [7.4791573e-02, 3.3044143e+00, 2.9444575e-02, 6.5828619e+00,
         4.5347050e-01, 3.6820149e+00, 1.6119602e-01],
        [1.0114859e+00, 1.5895953e+00, 4.2613583e+00, 4.9938977e-02,
         7.0686988e-04, 1.0006739e+00, 2.2473950e+00],
        [2.7570477e-01, 5.4884019e+00, 1.4896633e-01, 5.7496971e-01,
         6.5689909e-01, 7.5091702e-07, 6.0223069e+00]]], dtype=float32)>

In [50]:
predicted_obj2

<tf.Tensor: shape=(1, 7, 7), dtype=float32, numpy=
array([[[ 0.,  0.,  0.,  0.,  0., -0., -0.],
        [-0., -0., -0., -0.,  0.,  0., -0.],
        [-0., -0., -0.,  0., -0.,  0., -0.],
        [ 0.,  0., -0.,  0., -0., -0.,  0.],
        [-0.,  0.,  0., -0., -0., -0.,  0.],
        [ 0.,  0.,  0.,  0.,  0., -0., -0.],
        [-0.,  0., -0., -0., -0.,  0.,  0.]]], dtype=float32)>