In [1]:
import tensorflow as tf
import numpy as np
import dataset
import models
from utils import iou

input_shape = (448, 448, 3)
seed = 42

In [2]:
train_df = dataset.get_dataframe('./fruits_dataset/train')
train_dataset = dataset.load_dataset_from_df(train_df, num_epochs=10, seed=seed)

240it [00:08, 27.25it/s]


In [3]:
first_batch = next(iter(train_dataset))
batch_input = first_batch[0]
batch_target = first_batch[1]

In [4]:
yolo = models.YoloV1(input_shape=input_shape, num_classes=3)

In [5]:
batch_output = yolo(batch_input)

In [6]:
t_cellbox = batch_target[..., :4]
t_obj = batch_target[..., 4]
t_cls = batch_target[..., 5:]

o_cellbox1 = batch_output[..., :4]
o_obj1 = batch_output[..., 4]
o_cellbox2 = batch_output[..., 5:9]
o_obj2 = batch_output[..., 9]
o_cls = batch_output[..., 10:]

In [7]:
def convert_xy_offset_to_xy_center(cellboxes):
    x_offset = cellboxes[..., 0]
    y_offset = cellboxes[..., 1]
    w = cellboxes[..., 2]
    h = cellboxes[..., 3]
    
    num_w_cells = x_offset.shape[-1]
    num_h_cells = x_offset.shape[-2]
    
    # w_cell_indices: [[0, 1, 2, ...], [0, 1, 2, ...], ...]
    w_cell_indices = np.array(range(num_w_cells))
    w_cell_indices = np.broadcast_to(w_cell_indices, x_offset.shape)
    
    # h_cell_indices: [[0, 0, 0, ...], [1, 1, 1, ...], ....]
    h_cell_indices = np.array(range(num_h_cells))
    h_cell_indices = np.repeat(h_cell_indices, 7, 0).reshape(x_offset.shape[-2:])
    h_cell_indices = np.broadcast_to(h_cell_indices, x_offset.shape)
    
    x_center = (x_offset + w_cell_indices) / num_w_cells
    y_center = (y_offset + h_cell_indices) / num_h_cells
    
    mask = tf.cast((x_offset > 0), tf.float32)
    
    x_center *= mask
    y_center *= mask
    
    xy = tf.stack([x_center, y_center], axis=-1)
    
    w = tf.expand_dims(w, -1)
    h = tf.expand_dims(h, -1)
    
    bboxes = tf.concat([xy, w, h], axis=-1)
    
    return bboxes

def convert_to_corner_bbox(cellboxes):
    bboxes = convert_xy_offset_to_xy_center(cellboxes)
    x, y, w, h = bboxes[..., 0], bboxes[..., 1], bboxes[..., 2], bboxes[..., 3]
    
    x_min = x - (w / 2)
    y_min = y - (h / 2)
    x_max = x + (w / 2)
    y_max = y + (h / 2)
    
    corner_bboxes = tf.stack([x_min, y_min, x_max, y_max], axis=-1)
        
    return corner_bboxes

In [8]:
t_corner_bbox = convert_to_corner_bbox(t_cellbox)
o_corner_bbox1 = convert_to_corner_bbox(o_cellbox1)
o_corner_bbox2 = convert_to_corner_bbox(o_cellbox2)

In [9]:
iou_box1 = iou(o_corner_bbox1, t_corner_bbox)
iou_box2 = iou(o_corner_bbox2, t_corner_bbox)

In [10]:
ious = tf.stack([iou_box1, iou_box2], axis=-1)

In [11]:
best_box = tf.math.argmax(ious, axis=-1)
best_box = tf.cast(best_box, tf.float32)

# **Demo confidence loss**

## **Obj loss**

In [12]:
predicted_obj1 = o_obj1 * t_obj * (1 - best_box)
predicted_obj2 = o_obj2 * t_obj * best_box 

In [13]:
predicted_obj = predicted_obj1 + predicted_obj2

In [14]:
sqr_err = tf.square(t_obj - predicted_obj)

In [15]:
single_loss = tf.reduce_sum(sqr_err, [1, 2])

In [16]:
batch_loss = tf.reduce_mean(single_loss)
batch_loss

<tf.Tensor: shape=(), dtype=float32, numpy=1.5625057>

In [17]:
single_loss[0]

<tf.Tensor: shape=(), dtype=float32, numpy=0.9999846>

## **No obj loss**

In [18]:
t_noobj = 1 - t_obj
noobj_weight = 0.5

In [19]:
predicted_noobj1 = o_obj1 * t_noobj
predicted_noobj2 = o_obj2 * t_noobj

In [20]:
sqr_err_noobj1 = tf.square((t_obj * t_noobj) - predicted_noobj1)
sqr_err_noobj2 = tf.square((t_obj * t_noobj) - predicted_noobj2)

In [21]:
sqr_err_noobj = tf.reduce_sum(sqr_err_noobj1 + sqr_err_noobj2, [1, 2])

In [22]:
batch_loss = noobj_weight * tf.reduce_mean(sqr_err_noobj)

In [23]:
batch_loss

<tf.Tensor: shape=(), dtype=float32, numpy=1.0097031e-08>

In [24]:
sqr_err_noobj[0]

<tf.Tensor: shape=(), dtype=float32, numpy=2.1970942e-08>

## **Combine obj & no-obj loss**

In [25]:
train_df = dataset.get_dataframe('./fruits_dataset/train')
train_dataset = dataset.load_dataset_from_df(train_df, num_epochs=1, batch_size=1)

240it [00:05, 46.33it/s]


In [26]:
first_batch = next(iter(train_dataset))
batch_input = first_batch[0]
batch_target = first_batch[1]

In [27]:
yolo = models.YoloV1(input_shape=input_shape, num_classes=3)

In [35]:
with tf.GradientTape() as tape:
    batch_output = yolo(batch_input)
    noobj_weight = 0.5
    
    # Get xywh, obj, class
    t_cellbox = batch_target[..., :4]
    t_obj = batch_target[..., 4]
    t_cls = batch_target[..., 5:]
    
    o_cellbox1 = batch_output[..., :4]
    o_obj1 = batch_output[..., 4]
    o_cellbox2 = batch_output[..., 5:9]
    o_obj2 = batch_output[..., 9]
    o_cls = batch_output[..., 10:]
    
    # Get the highest iou
    t_corner_bbox = convert_to_corner_bbox(t_cellbox)
    o_corner_bbox1 = convert_to_corner_bbox(o_cellbox1)
    o_corner_bbox2 = convert_to_corner_bbox(o_cellbox2)
    
    iou_box1 = iou(o_corner_bbox1, t_corner_bbox)
    iou_box2 = iou(o_corner_bbox2, t_corner_bbox)
    ious = tf.stack([iou_box1, iou_box2], axis=-1)
    best_box = tf.math.argmax(ious, axis=-1)
    best_box = tf.cast(best_box, tf.float32)
    
    # Compute no obj loss
    noobj_mask = 1 - t_obj
    o_noobj1, o_noobj2 = o_obj1 * noobj_mask, o_obj2 * noobj_mask
    sqr_err_noobj1 = tf.square((t_obj * noobj_mask) - o_noobj1)
    sqr_err_noobj2 = tf.square((t_obj * noobj_mask) - o_noobj2)
    single_sse_noobj = tf.reduce_sum(sqr_err_noobj1 + sqr_err_noobj2, [1, 2])
    noobj_loss = noobj_weight * tf.reduce_mean(single_sse_noobj)
    
    # Compute obj loss
    predicted_obj1 = o_obj1 * t_obj * (1 - best_box)
    predicted_obj2 = o_obj2 * t_obj * best_box 
    predicted_obj = predicted_obj1 + predicted_obj2
    sqr_err_obj = tf.square(t_obj - predicted_obj)
    single_sse_obj = tf.reduce_sum(sqr_err_obj, [1, 2])
    obj_loss = tf.reduce_mean(single_sse_obj)
    
    loss =  obj_loss + (noobj_weight * noobj_loss)
    
grad = tape.gradient(loss, yolo.trainable_variables)

In [36]:
tf.reduce_sum(tf.cast(grad[-1] != 0, tf.int32)) == 97

<tf.Tensor: shape=(), dtype=bool, numpy=True>

In [37]:
tf.reduce_sum(tf.cast(grad[-2] != 0, tf.int32)) == 4096 * 97

<tf.Tensor: shape=(), dtype=bool, numpy=True>

In [38]:
loss

<tf.Tensor: shape=(), dtype=float32, numpy=1.0000072>

In [39]:
single_loss[0] + (noobj_weight * sqr_err_noobj[0])

<tf.Tensor: shape=(), dtype=float32, numpy=0.9999846>