In [1]:
import tensorflow as tf
import numpy as np
import dataset
import models
from utils import iou

input_shape = (448, 448, 3)
seed = 42

In [2]:
train_df = dataset.get_dataframe('./fruits_dataset/train')
train_dataset = dataset.load_dataset_from_df(train_df, num_epochs=10, seed=seed)

240it [00:05, 47.07it/s]


In [3]:
first_batch = next(iter(train_dataset))
batch_input = first_batch[0]
batch_target = first_batch[1]

In [4]:
yolo = models.YoloV1(input_shape=input_shape, num_classes=3)

In [5]:
batch_output = yolo(batch_input)

In [6]:
t_box = batch_target[..., :4]
t_obj = batch_target[..., 4]
t_cls = batch_target[..., 5:]

o_box1 = batch_output[..., :4]
o_obj1 = batch_output[..., 4]
o_box2 = batch_output[..., 5:9]
o_obj2 = batch_output[..., 9]
o_cls = batch_output[..., 10:]

In [7]:
def convert_xy_offset_to_xy_center(cellboxes):
    x_offset = cellboxes[..., 0]
    y_offset = cellboxes[..., 1]
    w = cellboxes[..., 2]
    h = cellboxes[..., 3]
    
    num_w_cells = x_offset.shape[-1]
    num_h_cells = x_offset.shape[-2]
    
    # w_cell_indices: [[0, 1, 2, ...], [0, 1, 2, ...], ...]
    w_cell_indices = np.array(range(num_w_cells))
    w_cell_indices = np.broadcast_to(w_cell_indices, x_offset.shape)
    
    # h_cell_indices: [[0, 0, 0, ...], [1, 1, 1, ...], ....]
    h_cell_indices = np.array(range(num_h_cells))
    h_cell_indices = np.repeat(h_cell_indices, 7, 0).reshape(x_offset.shape[-2:])
    h_cell_indices = np.broadcast_to(h_cell_indices, x_offset.shape)
    
    x_center = (x_offset + w_cell_indices) / num_w_cells
    y_center = (y_offset + h_cell_indices) / num_h_cells
    
    mask = tf.cast((x_offset > 0), tf.float32)
    
    x_center *= mask
    y_center *= mask
    
    xy = tf.stack([x_center, y_center], axis=-1)
    
    w = tf.expand_dims(w, -1)
    h = tf.expand_dims(h, -1)
    
    bboxes = tf.concat([xy, w, h], axis=-1)
    
    return bboxes

def convert_to_corner_bbox(cellboxes):
    bboxes = convert_xy_offset_to_xy_center(cellboxes)
    x, y, w, h = bboxes[..., 0], bboxes[..., 1], bboxes[..., 2], bboxes[..., 3]
    
    x_min = x - (w / 2)
    y_min = y - (h / 2)
    x_max = x + (w / 2)
    y_max = y + (h / 2)
    
    corner_bboxes = tf.stack([x_min, y_min, x_max, y_max], axis=-1)
        
    return corner_bboxes

In [8]:
t_corner_bboxes = convert_to_corner_bbox(t_box)
o_corner_bboxes1 = convert_to_corner_bbox(o_box1)
o_corner_bboxes2 = convert_to_corner_bbox(o_box2)

In [9]:
iou_box1 = iou(o_corner_bboxes1, t_corner_bboxes)
iou_box2 = iou(o_corner_bboxes2, t_corner_bboxes)

In [10]:
iou_box1 = tf.expand_dims(iou_box1, -1)
iou_box2 = tf.expand_dims(iou_box2, -1)

In [12]:
iou_concat = tf.concat([iou_box1, iou_box2], axis=-1)

In [187]:
responsible_box = tf.math.argmax(iou_concat, axis=-1)
responsible_box = tf.cast(responsible_box, tf.float32)

# **Demo class dist. loss**

## **xy loss**

In [229]:
# Get xy, wh
target_xy, target_wh = t_box[..., :2], t_box[..., 2:]

box1_xy, box1_wh = o_box1[..., :2], o_box1[..., 2:]

box2_xy, box2_wh = o_box2[..., :2], o_box2[..., 2:]

In [230]:
# (x - x_hat)^2, (y - y_hat)^2
sqr_err_1 = tf.square(target_xy - box1_xy)
sqr_err_2 = tf.square(target_xy - box2_xy)

In [231]:
# [(x - x_hat)^2 + (y - y_hat)^2]
sum_xy_1 = tf.reduce_sum(sqr_err_1, -1)
sum_xy_2 = tf.reduce_sum(sqr_err_2, -1)

In [232]:
# 1obj_j * [(x - x_hat)^2 + (y - y_hat)^2]
predictor_1 = sum_xy_1 * (1 - responsible_box)
predictor_2 = sum_xy_2 * responsible_box

In [234]:
# 1obj_ij * [(x - x_hat)^2 + (y - y_hat)^2]
obj_predictor_1 = predictor_1 * t_obj
obj_predictor_2 = predictor_2 * t_obj

In [255]:
xy_predictor = obj_predictor_1 + obj_predictor_2

In [273]:
single_loss = tf.reduce_sum(xy_predictor, [1, 2])

In [298]:
batch_loss = tf.reduce_mean(single_loss)

In [299]:
coord_weight = 5
batch_loss *= coord_weight
batch_loss

<tf.Tensor: shape=(), dtype=float32, numpy=4.208332>

## **wh loss**

In [313]:
# (sqrt(w) - sqrt(w_hat))^2 , (sqrt(h) - sqrt(h_hat))^2 
sqrt_target_wh = tf.sqrt(target_wh)
sqrt_box1_wh = tf.sqrt(box1_wh)
sqrt_box2_wh = tf.sqrt(box2_wh)

sqr_err_wh_1 = tf.square(sqrt_target_wh - sqrt_box1_wh)
sqr_err_wh_2 = tf.square(sqrt_target_wh - sqrt_box2_wh)

In [327]:
# [(sqrt(w) - sqrt(w_hat))^2 + (sqrt(h) - sqrt(h_hat))^2]
