In [1]:
import tensorflow as tf
import numpy as np

import sys
sys.path.append('../')

import dataset
import models
from utils import iou

input_shape = (448, 448, 3)
seed = 42

In [2]:
train_df = dataset.get_dataframe('../fruits_dataset/train')
train_dataset = dataset.load_dataset_from_df(train_df, num_epochs=10, seed=seed)

240it [00:07, 33.84it/s]


In [3]:
first_batch = next(iter(train_dataset))
batch_input = first_batch[0]
batch_target = first_batch[1]

In [4]:
yolo = models.YoloV1(input_shape=input_shape, num_classes=3)

Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.


In [5]:
batch_output = yolo(batch_input, training=True)

In [6]:
t_box = batch_target[..., :4]
t_obj = batch_target[..., 4]
t_cls = batch_target[..., 5:]

o_box1 = batch_output[..., :4]
o_obj1 = batch_output[..., 4]
o_box2 = batch_output[..., 5:9]
o_obj2 = batch_output[..., 9]
o_cls = batch_output[..., 10:]

In [7]:
def convert_xy_offset_to_xy_center(cellboxes):
    x_offset = cellboxes[..., 0]
    y_offset = cellboxes[..., 1]
    w = cellboxes[..., 2]
    h = cellboxes[..., 3]
    
    num_w_cells = x_offset.shape[-1]
    num_h_cells = x_offset.shape[-2]
    
    # w_cell_indices: [[0, 1, 2, ...], [0, 1, 2, ...], ...]
    w_cell_indices = np.array(range(num_w_cells))
    w_cell_indices = np.broadcast_to(w_cell_indices, x_offset.shape)
    
    # h_cell_indices: [[0, 0, 0, ...], [1, 1, 1, ...], ....]
    h_cell_indices = np.array(range(num_h_cells))
    h_cell_indices = np.repeat(h_cell_indices, 7, 0).reshape(x_offset.shape[-2:])
    h_cell_indices = np.broadcast_to(h_cell_indices, x_offset.shape)
    
    x_center = (x_offset + w_cell_indices) / num_w_cells
    y_center = (y_offset + h_cell_indices) / num_h_cells
    
    mask = tf.cast((x_offset > 0), tf.float32)
    
    x_center *= mask
    y_center *= mask
    
    xy = tf.stack([x_center, y_center], axis=-1)
    
    w = tf.expand_dims(w, -1)
    h = tf.expand_dims(h, -1)
    
    bboxes = tf.concat([xy, w, h], axis=-1)
    
    return bboxes

def convert_to_corner_bbox(cellboxes):
    bboxes = convert_xy_offset_to_xy_center(cellboxes)
    x, y, w, h = bboxes[..., 0], bboxes[..., 1], bboxes[..., 2], bboxes[..., 3]
    
    x_min = x - (w / 2)
    y_min = y - (h / 2)
    x_max = x + (w / 2)
    y_max = y + (h / 2)
    
    corner_bboxes = tf.stack([x_min, y_min, x_max, y_max], axis=-1)
        
    return corner_bboxes

In [8]:
t_cellbox = batch_target[..., :4]
t_obj = batch_target[..., 4]
t_cls = batch_target[..., 5:]
o_cellbox1 = batch_output[..., :4]
o_obj1 = batch_output[..., 4]
o_cellbox2 = batch_output[..., 5:9]
o_obj2 = batch_output[..., 9]
o_cls = batch_output[..., 10:]

o_corner_bboxes1 = convert_to_corner_bbox(o_cellbox1)
o_corner_bboxes2 = convert_to_corner_bbox(o_cellbox2)
t_corner_bboxes = convert_to_corner_bbox(t_cellbox)

In [9]:
iou_box1 = iou(o_corner_bboxes1, t_corner_bboxes)
iou_box2 = iou(o_corner_bboxes2, t_corner_bboxes)

In [10]:
iou_box1 = tf.expand_dims(iou_box1, -1)
iou_box2 = tf.expand_dims(iou_box2, -1)

In [11]:
iou_concat = tf.concat([iou_box1, iou_box2], axis=-1)

In [12]:
responsible_box = tf.math.argmax(iou_concat, axis=-1)
responsible_box = tf.cast(responsible_box, tf.float32)

# **Demo class dist. loss**

## **xy loss**

In [13]:
# Get xy, wh
target_xy, target_wh = t_box[..., :2], t_box[..., 2:]

box1_xy, box1_wh = o_box1[..., :2], o_box1[..., 2:]

box2_xy, box2_wh = o_box2[..., :2], o_box2[..., 2:]

In [14]:
# (x - x_hat)^2, (y - y_hat)^2
sqr_err_1 = tf.square(target_xy - box1_xy)
sqr_err_2 = tf.square(target_xy - box2_xy)

In [15]:
# [(x - x_hat)^2 + (y - y_hat)^2]
sum_xy_1 = tf.reduce_sum(sqr_err_1, -1)
sum_xy_2 = tf.reduce_sum(sqr_err_2, -1)

In [16]:
# 1obj_j * [(x - x_hat)^2 + (y - y_hat)^2]
predictor_1 = sum_xy_1 * (1 - responsible_box)
predictor_2 = sum_xy_2 * responsible_box

In [17]:
# 1obj_ij * [(x - x_hat)^2 + (y - y_hat)^2]
obj_predictor_1 = predictor_1 * t_obj
obj_predictor_2 = predictor_2 * t_obj

In [18]:
xy_predictor = obj_predictor_1 + obj_predictor_2

In [19]:
single_xy_loss = tf.reduce_sum(xy_predictor, [1, 2])

In [20]:
batch_xy_loss = tf.reduce_mean(single_xy_loss)

In [21]:
coord_weight = 5
batch_xy_loss *= coord_weight
batch_xy_loss

<tf.Tensor: shape=(), dtype=float32, numpy=66.85336>

In [22]:
single_xy_loss[0] 

<tf.Tensor: shape=(), dtype=float32, numpy=1.0434695>

## **wh loss**

In [23]:
# (sqrt(w) - sqrt(w_hat))^2 , (sqrt(h) - sqrt(h_hat))^2 
sqrt_target_wh = tf.sqrt(target_wh)
sqrt_box1_wh = tf.sqrt(tf.abs(box1_wh))
sqrt_box2_wh = tf.sqrt(tf.abs(box2_wh))

sqr_err_wh_1 = tf.square(sqrt_target_wh - sqrt_box1_wh)
sqr_err_wh_2 = tf.square(sqrt_target_wh - sqrt_box2_wh)

In [24]:
# [(sqrt(w) - sqrt(w_hat))^2 + (sqrt(h) - sqrt(h_hat))^2]
sum_wh_1 = tf.reduce_sum(sqr_err_wh_1, -1)
sum_wh_2 = tf.reduce_sum(sqr_err_wh_2, -1)

In [25]:
# 1obj_j * [(sqrt(w) - sqrt(w_hat))^2 + (sqrt(h) - sqrt(h_hat))^2]
predictor_wh_1 = sum_wh_1 * (1 - responsible_box)
predictor_wh_2 = sum_wh_2 * responsible_box

In [26]:
# 1obj_ij * [(sqrt(w) - sqrt(w_hat))^2 + (sqrt(h) - sqrt(h_hat))^2]
obj_predictor_wh_1 = predictor_wh_1 * t_obj
obj_predictor_wh_2 = predictor_wh_2 * t_obj

In [27]:
wh_predictor = obj_predictor_wh_1 + obj_predictor_wh_2

In [28]:
single_wh_loss = tf.reduce_sum(wh_predictor, [1, 2])

In [29]:
batch_wh_loss = tf.reduce_mean(single_wh_loss)

In [30]:
batch_wh_loss *= coord_weight
batch_wh_loss

<tf.Tensor: shape=(), dtype=float32, numpy=5.051384>

In [31]:
single_wh_loss[0] 

<tf.Tensor: shape=(), dtype=float32, numpy=0.29782522>

## **Combine xy wh**

In [32]:
train_df = dataset.get_dataframe('../fruits_dataset/train')
train_dataset = dataset.load_dataset_from_df(train_df, num_epochs=1, batch_size=1)

240it [00:06, 38.55it/s]


In [33]:
first_batch = next(iter(train_dataset))
batch_input = first_batch[0]
batch_target = first_batch[1]

In [34]:
yolo = models.YoloV1(input_shape=input_shape, num_classes=3)

In [35]:
with tf.GradientTape() as tape:
    batch_output = yolo(batch_input, training=True)
    coord_weight = 5
    
    # Get xywh, obj, class
    t_cellbox = batch_target[..., :4]
    t_obj = batch_target[..., 4]
    t_cls = batch_target[..., 5:]
    o_cellbox1 = batch_output[..., :4]
    o_obj1 = batch_output[..., 4]
    o_cellbox2 = batch_output[..., 5:9]
    o_obj2 = batch_output[..., 9]
    o_cls = batch_output[..., 10:]
    
    # Convert cellbox (xy_offset, w, h) to xy_min_max to compute iou
    t_corner_bbox = convert_to_corner_bbox(t_cellbox)
    o_corner_bbox1 = convert_to_corner_bbox(o_cellbox1)
    o_corner_bbox2 = convert_to_corner_bbox(o_cellbox2)
    
    # Compute iou
    iou_box1 = iou(o_corner_bbox1, t_corner_bbox)
    iou_box2 = iou(o_corner_bbox2, t_corner_bbox)
    
    # Get the highest iou
    iou_box1 = tf.expand_dims(iou_box1, -1)
    iou_box2 = tf.expand_dims(iou_box2, -1)
    iou_concat = tf.concat([iou_box1, iou_box2], axis=-1)
    responsible_box = tf.cast(tf.math.argmax(iou_concat, axis=-1), 
                              tf.float32)
    
    # Get xy, wh
    target_xy, target_wh = t_cellbox[..., :2], t_cellbox[..., 2:]
    box1_xy, box1_wh = o_cellbox1[..., :2], o_cellbox1[..., 2:]
    box2_xy, box2_wh = o_cellbox2[..., :2], o_cellbox2[..., 2:]
    
    # Compute xy loss
    sse_xy_1 = tf.reduce_sum(tf.square(target_xy - box1_xy), -1)
    sse_xy_2 = tf.reduce_sum(tf.square(target_xy - box2_xy), -1)
    
    xy_predictor_1 = sse_xy_1 * (1 - responsible_box) * t_obj
    xy_predictor_2 = sse_xy_2 * responsible_box * t_obj
    
    xy_predictor = xy_predictor_1 + xy_predictor_2
    xy_loss = tf.reduce_mean(tf.reduce_sum(xy_predictor, [1, 2]))
    
    # Compute wh loss
    target_wh = tf.sqrt(target_wh)
    box1_wh, box2_wh = tf.sqrt(tf.abs(box1_wh)), tf.sqrt(tf.abs(box2_wh))
    
    sse_wh_1 = tf.reduce_sum(tf.square(target_wh - box1_wh), -1)
    sse_wh_2 = tf.reduce_sum(tf.square(target_wh - box2_wh), -1)
    
    wh_predictor_1 = sse_wh_1 * (1 - responsible_box) * t_obj
    wh_predictor_2 = sse_wh_2 * responsible_box * t_obj
    
    wh_predictor = wh_predictor_1 + wh_predictor_2
    wh_loss = tf.reduce_mean(tf.reduce_sum(wh_predictor, [1, 2]))
    
    loss = coord_weight * (wh_loss + xy_loss)

grad = tape.gradient(loss, yolo.trainable_variables)

In [49]:
tf.reduce_sum(tf.cast(grad[-2] != 0, tf.int32)) / 4

<tf.Tensor: shape=(), dtype=float64, numpy=2048.0>

In [37]:
tf.reduce_sum(tf.cast(grad[-1] != 0, tf.int32))

<tf.Tensor: shape=(), dtype=int32, numpy=4>

In [65]:
xy_predictor_1

<tf.Tensor: shape=(1, 7, 7), dtype=float32, numpy=
array([[[0.       , 0.       , 0.       , 0.       , 0.       ,
         0.       , 0.       ],
        [0.       , 0.       , 0.       , 0.       , 0.       ,
         0.       , 0.       ],
        [0.       , 0.       , 0.       , 0.       , 0.       ,
         0.       , 0.       ],
        [0.       , 0.       , 0.       , 6.3431826, 0.       ,
         0.       , 0.       ],
        [0.       , 0.       , 0.       , 0.       , 0.       ,
         0.       , 0.       ],
        [0.       , 0.       , 0.       , 0.       , 0.       ,
         0.       , 0.       ],
        [0.       , 0.       , 0.       , 0.       , 0.       ,
         0.       , 0.       ]]], dtype=float32)>

In [57]:
wh_predictor[0]

<tf.Tensor: shape=(7, 7), dtype=float32, numpy=
array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.26016322, 0.        ,
        0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        ]], dtype=float32)>