In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import math
import datetime
import os
import time

import numpy as np
import tensorflow as tf

from tensorflow.keras.layers import Concatenate
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import MaxPool2D
from tensorflow.keras.layers import UpSampling2D
from tensorflow.keras.layers import BatchNormalization

In [None]:
def custom_tiny_model(num_classes):
    input_image = Input(shape=(416, 416, 3))
    final_out = (num_classes + 5) * 3
    x   = input_image
    filters = 16 
    for i in range(4): # idx : from 0 to 3
        x   = Conv2D(filters, 3, strides = 1, padding = 'same', use_bias=False, name = 'conv_' + str(i))(x)
        x   = BatchNormalization(epsilon=0.001, name='bnorm_' + str(i))(x)
        x   = LeakyReLU(alpha=0.1, name='leaky_' + str(i))(x)
        x   = MaxPool2D(pool_size=[2, 2], strides=[2, 2], padding = 'same', name = 'bool_' + str(i))(x)
        filters *= 2
    # idx : 4
    i += 1
    x   = Conv2D(filters, 3, strides = 1, padding = 'same', use_bias=False, name = 'conv_' + str(i))(x)
    x   = BatchNormalization(epsilon=0.001, name='bnorm_' + str(i))(x)
    x   = LeakyReLU(alpha=0.1, name='leaky_' + str(i))(x)
    # idx : 
    #i += 1
    skip_2 = x
    x   = MaxPool2D(pool_size=[2, 2], strides=[2, 2], padding = 'same', name = 'bool_' + str(i))(x)
    # # idx : 5
    filters *= 2
    i += 1
    x   = Conv2D(filters, 3, strides = 1, padding = 'same', use_bias=False, name = 'conv_' + str(i))(x)
    x   = BatchNormalization(epsilon=0.001, name='bnorm_' + str(i))(x)
    x   = LeakyReLU(alpha=0.1, name='leaky_' + str(i))(x)
    x   = MaxPool2D(pool_size=[2, 2], strides=[1, 1], padding = 'same', name = 'bool_' + str(i))(x)
    # idx : 6
    filters *= 2
    i += 1
    x   = Conv2D(filters, 3, strides = 1, padding = 'same', use_bias=False, name = 'conv_' + str(i))(x)
    x   = BatchNormalization(epsilon=0.001, name='bnorm_' + str(i))(x)
    x   = LeakyReLU(alpha=0.1, name='leaky_' + str(i))(x)
    # idx : 7
    ################################################ Transfer Learning ####################################################
    i += 1
    x   = Conv2D(256, 1, strides = 1, padding = 'same', use_bias=False, name = 'TL_conv_' + str(i))(x)
    x   = BatchNormalization(epsilon=0.001, name='bnorm_' + str(i))(x)
    x   = LeakyReLU(alpha=0.1, name='leaky_' + str(i))(x)
    # 
    skip_1 = x    
    i += 1
    x   = Conv2D(512, 3, strides = 1, padding = 'same', use_bias=False, name = 'TL_conv_' + str(i))(x)
    x   = BatchNormalization(epsilon=0.001, name='TL_bnorm_' + str(i))(x)
    x   = LeakyReLU(alpha=0.1, name='TL_leaky_' + str(i))(x)
    # 
    i += 1
    y_large   = Conv2D(final_out, 1, strides = 1, padding = 'same', name = 'TL_conv_' + str(i))(x)
    # 
    i += 1
    x   = Conv2D(128, 1, strides = 1, padding = 'same', use_bias=False, name = 'TL_conv_' + str(i))(skip_1)
    x   = BatchNormalization(epsilon=0.001, name='TL_bnorm_' + str(i))(x)
    x   = LeakyReLU(alpha=0.1, name='TL_leaky_' + str(i))(x)
    # 
    i += 1
    x   = UpSampling2D(2, name = 'TL_upsampling_' + str(i))(x)
    # 
    i += 1
    x = Concatenate(name = 'TL_concatenate_' + str(i))([x, skip_2])
    # 
    i += 1
    x   = Conv2D(256, 3, strides = 1, padding = 'same', use_bias=False, name = 'TL_conv_' + str(i))(x)
    x   = BatchNormalization(epsilon=0.001, name='TL_bnorm_' + str(i))(x)
    x   = LeakyReLU(alpha=0.1, name='TL_leaky_' + str(i))(x)
    # 
    i += 1
    y_small   = Conv2D(final_out, 1, strides = 1, padding = 'same', name = 'TL_conv_' + str(i))(x)
    # reshape
    y_small_shape = tf.shape(y_small) 
    y_large_shape = tf.shape(y_large)
    y_small = tf.reshape(y_small, (y_small_shape[0], y_small_shape[1], y_small_shape[2], 3, -1),name='TL_reshape_small')
    y_large = tf.reshape(y_large, (y_large_shape[0], y_large_shape[1], y_large_shape[2], 3, -1),name='TL_reshape_large')
    new_model = tf.keras.Model(input_image, (y_small, y_large))
    return new_model

In [None]:
def TL_tiny_model(num_classes):
    model = custom_tiny_model(num_classes)
    for layer in model.layers:
        if not(layer.name.startswith("TL")):
            layer.trainable = False
    return model

In [None]:
TL_tiny_model(6).summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 416, 416, 3) 0                                            
__________________________________________________________________________________________________
conv_0 (Conv2D)                 (None, 416, 416, 16) 432         input_1[0][0]                    
__________________________________________________________________________________________________
bnorm_0 (BatchNormalization)    (None, 416, 416, 16) 64          conv_0[0][0]                     
__________________________________________________________________________________________________
leaky_0 (LeakyReLU)             (None, 416, 416, 16) 0           bnorm_0[0][0]                    
_______________________________________________________________________________________

In [None]:
def make_tiny_yolov3_model():
    input_image = Input(shape=(416, 416, 3))
    final_out = (80 + 5) * 3
    x   = input_image
    filters = 16
    for i in range(4): # idx : from 0 to 3
        x   = Conv2D(filters, 3, strides = 1, padding = 'same', use_bias=False, name = 'conv_' + str(i))(x)
        x   = BatchNormalization(epsilon=0.001, name='bnorm_' + str(i))(x)
        x   = LeakyReLU(alpha=0.1, name='leaky_' + str(i))(x)
        x   = MaxPool2D(pool_size=[2, 2], strides=[2, 2], padding = 'same', name = 'bool_' + str(i))(x)
        filters *= 2
    # idx : 4
    i += 1
    x   = Conv2D(filters, 3, strides = 1, padding = 'same', use_bias=False, name = 'conv_' + str(i))(x)
    x   = BatchNormalization(epsilon=0.001, name='bnorm_' + str(i))(x)
    x   = LeakyReLU(alpha=0.1, name='leaky_' + str(i))(x)
    # idx : 
    #i += 1
    skip_2 = x
    x   = MaxPool2D(pool_size=[2, 2], strides=[2, 2], padding = 'same', name = 'bool_' + str(i))(x)
    # # idx : 5
    filters *= 2
    i += 1
    x   = Conv2D(filters, 3, strides = 1, padding = 'same', use_bias=False, name = 'conv_' + str(i))(x)
    x   = BatchNormalization(epsilon=0.001, name='bnorm_' + str(i))(x)
    x   = LeakyReLU(alpha=0.1, name='leaky_' + str(i))(x)
    x   = MaxPool2D(pool_size=[2, 2], strides=[1, 1], padding = 'same', name = 'bool_' + str(i))(x)
    # idx : 6
    filters *= 2
    i += 1
    x   = Conv2D(filters, 3, strides = 1, padding = 'same', use_bias=False, name = 'conv_' + str(i))(x)
    x   = BatchNormalization(epsilon=0.001, name='bnorm_' + str(i))(x)
    x   = LeakyReLU(alpha=0.1, name='leaky_' + str(i))(x)
    # idx : 7
    i += 1
    x   = Conv2D(256, 1, strides = 1, padding = 'same', use_bias=False, name = 'conv_' + str(i))(x)
    x   = BatchNormalization(epsilon=0.001, name='bnorm_' + str(i))(x)
    x   = LeakyReLU(alpha=0.1, name='leaky_' + str(i))(x)
    # 
    #i += 1
    skip_1 = x

    # idx : 8
    i += 1
    x   = Conv2D(512, 3, strides = 1, padding = 'same', use_bias=False, name = 'conv_' + str(i))(x)
    x   = BatchNormalization(epsilon=0.001, name='bnorm_' + str(i))(x)
    x   = LeakyReLU(alpha=0.1, name='leaky_' + str(i))(x)
    # idx : 9
    i += 1
    y_large   = Conv2D(final_out, 1, strides = 1, padding = 'same', name = 'conv_' + str(i))(x)
    # idx : 10
    i += 1
    x   = Conv2D(128, 1, strides = 1, padding = 'same', use_bias=False, name = 'conv_' + str(i))(skip_1)
    x   = BatchNormalization(epsilon=0.001, name='bnorm_' + str(i))(x)
    x   = LeakyReLU(alpha=0.1, name='leaky_' + str(i))(x)
    # idx : 11
    i += 1
    x   = UpSampling2D(2, name = 'upsampling_' + str(i))(x)
    # idx : 12
    i += 1
    x = Concatenate(name = 'concatenate_' + str(i))([x, skip_2])
    # idx : 13
    i += 1
    x   = Conv2D(256, 3, strides = 1, padding = 'same', use_bias=False, name = 'conv_' + str(i))(x)
    x   = BatchNormalization(epsilon=0.001, name='bnorm_' + str(i))(x)
    x   = LeakyReLU(alpha=0.1, name='leaky_' + str(i))(x)
    # idx : 14
    i += 1
    y_small   = Conv2D(final_out, 1, strides = 1, padding = 'same', name = 'conv_' + str(i))(x)
    return tf.keras.Model(input_image, (y_large, y_small)) 

#**Transfer Learning**

In [None]:
def our_tiny_model(num_classes, first_time):
    final_out = (5 + num_classes)*3
    model  = make_tiny_yolov3_model()
    if first_time:
      model.load_weights('/content/gdrive/My Drive/tiny_weights/tiny-yolo3.h5')
    for layer in model.layers:
        layer.trainable = False
    x      = model.get_layer('leaky_7').output
    skip_2 = model.get_layer('leaky_4').output

    #
    i   = 20
    x   = Conv2D(256, 1, strides = 1, padding = 'same', use_bias=False, name = 'conv_' + str(i))(x)
    x   = BatchNormalization(epsilon=0.001, name='bnorm_' + str(i))(x)
    x   = LeakyReLU(alpha=0.1, name='leaky_' + str(i))(x)
    skip_1 = x
    i += 1
    x   = Conv2D(512, 3, strides = 1, padding = 'same', use_bias=False, name = 'conv_' + str(i))(x)
    x   = BatchNormalization(epsilon=0.001, name='bnorm_' + str(i))(x)
    x   = LeakyReLU(alpha=0.1, name='leaky_' + str(i))(x)
    # idx : 9
    i += 1
    y_large   = Conv2D(final_out, 1, strides = 1, padding = 'same', name = 'conv_' + str(i))(x)
    # idx : 10
    i += 1
    x   = Conv2D(128, 1, strides = 1, padding = 'same', use_bias=False, name = 'conv_' + str(i))(skip_1)
    x   = BatchNormalization(epsilon=0.001, name='bnorm_' + str(i))(x)
    x   = LeakyReLU(alpha=0.1, name='leaky_' + str(i))(x)
    # idx : 11
    i += 1
    x   = UpSampling2D(2, name = 'upsampling_' + str(i))(x)
    # idx : 12
    i += 1
    x = Concatenate(name = 'concatenate_' + str(i))([x, skip_2])
    # idx : 13
    i += 1
    x   = Conv2D(256, 3, strides = 1, padding = 'same', use_bias=False, name = 'conv_' + str(i))(x)
    x   = BatchNormalization(epsilon=0.001, name='bnorm_' + str(i))(x)
    x   = LeakyReLU(alpha=0.1, name='leaky_' + str(i))(x)
    # idx : 14
    i += 1
    y_small   = Conv2D(final_out, 1, strides = 1, padding = 'same', name = 'conv_' + str(i))(x)
    # reshape
    y_small_shape = tf.shape(y_small) 
    y_large_shape = tf.shape(y_large)
    y_small = tf.reshape(y_small, (y_small_shape[0], y_small_shape[1], y_small_shape[2], 3, -1),name='detector_reshape_small')
    y_large = tf.reshape(y_large, (y_large_shape[0], y_large_shape[1], y_large_shape[2], 3, -1),name='detector_reshape_large')
    new_model = tf.keras.Model(model.input, (y_small, y_large))
    return new_model

#**utils**

In [None]:
def xywh_to_x1y1x2y2(box):
    xy = box[..., 0:2]
    wh = box[..., 2:4]

    x1y1 = xy - wh / 2
    x2y2 = xy + wh / 2

    y_box = tf.concat([x1y1, x2y2], axis=-1)
    return y_box

In [None]:
def broadcast_iou(box1, box2):
    """
    calculate iou between one box1iction box and multiple box2 box in a broadcast way
    inputs:
    box1: a tensor full of boxes, eg. (3, 4)
    box2: another tensor full of boxes, eg. (3, 4)
    """

    # assert one dimension in order to mix match box1 and box2
    # eg: 
    # box1 -> (3, 1, 4)
    # box2 -> (1, 3, 4)
    box1 = tf.expand_dims(box1, -2)
    box2 = tf.expand_dims(box2, 0)

    # derive the union of shape to broadcast
    # eg. new_shape -> (3, 3, 4)
    new_shape = tf.broadcast_dynamic_shape(tf.shape(box1), tf.shape(box2))

    # broadcast (duplicate) box1 and box2 so that
    # each box2 has one box1 matched correspondingly
    # box1: (3, 3, 4)
    # box2: (3, 3, 4)
    box1 = tf.broadcast_to(box1, new_shape)
    box2 = tf.broadcast_to(box2, new_shape)

    # minimum xmax - maximum xmin is the width of intersection.
    # but has to be greater or equal to 0
    interserction_w = tf.maximum(
        tf.minimum(box1[..., 2], box2[..., 2]) - tf.maximum(
            box1[..., 0], box2[..., 0]), 0)
    # minimum ymax - maximum ymin is the height of intersection.
    # but has to be greater or equal to 0
    interserction_h = tf.maximum(
        tf.minimum(box1[..., 3], box2[..., 3]) - tf.maximum(
            box1[..., 1], box2[..., 1]), 0)
    intersection_area = interserction_w * interserction_h
    box1_area = (box1[..., 2] - box1[..., 0]) * \
        (box1[..., 3] - box1[..., 1])
    box2_area = (box2[..., 2] - box2[..., 0]) * \
        (box2[..., 3] - box2[..., 1])
    # intersection over union
    return intersection_area / (box1_area + box2_area - intersection_area)

In [None]:
def binary_cross_entropy(logits, labels):
    epsilon = 1e-7
    logits = tf.clip_by_value(logits, epsilon, 1 - epsilon)
    return -(labels * tf.math.log(logits) +
             (1 - labels) * tf.math.log(1 - logits))

In [None]:
def get_absolute_yolo_box(y_pred, valid_anchors_wh, num_classes):
    """
    Given a cell offset prediction from the model, calculate the absolute box coordinates to the whole image.
    It's also an adpation of the original C code here:
    https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/src/yolo_layer.c#L83
    note that, we divide w and h by grid size 
    inputs:
    y_pred: Prediction tensor from the model output, in the shape of (batch, grid, grid, anchor, 5 + num_classes)
    outputs:
    y_box: boxes in shape of (batch, grid, grid, anchor, 4), the last dimension is (xmin, ymin, xmax, ymax)
    objectness: probability that an object exists
    classes: probability of classes
    """

    t_xy, t_wh, objectness, classes = tf.split(
        y_pred, (2, 2, 1, num_classes), axis=-1)

    objectness = tf.sigmoid(objectness)
    classes = tf.sigmoid(classes)
    #####################################################################
    # Pr(class(i)) = Pr(class(i)|Object) * Pr(Object)
	  # multiply each class probability with the objectness score
    classes = classes * objectness

    grid_size = tf.shape(y_pred)[1]
    # meshgrid generates a grid that repeats by given range. It's the Cx and Cy in YoloV3 paper.
    # for example, tf.meshgrid(tf.range(3), tf.range(3)) will generate a list with two elements
    # note that in real code, the grid_size should be something like 13, 26, 52 for examples here and below
    #
    # [[0, 1, 2],
    #  [0, 1, 2],
    #  [0, 1, 2]]
    #
    # [[0, 0, 0],
    #  [1, 1, 1],
    #  [2, 2, 2]]
    #
    C_xy = tf.meshgrid(tf.range(grid_size), tf.range(grid_size))

    # next, we stack two items in the list together in the last dimension, so that
    # we can interleve these elements together and become this:
    #
    # [[[0, 0], [1, 0], [2, 0]],
    #  [[0, 1], [1, 1], [2, 1]],
    #  [[0, 2], [1, 2], [2, 2]]]
    #
    C_xy = tf.stack(C_xy, axis=-1)

    # let's add an empty dimension at axis=2 to expand the tensor to this:
    #
    # [[[[0, 0]], [[1, 0]], [[2, 0]]],
    #  [[[0, 1]], [[1, 1]], [[2, 1]]],
    #  [[[0, 2]], [[1, 2]], [[2, 2]]]]
    #
    # at this moment, we now have a grid, which can always give us (y, x)
    # if we access grid[x][y]. For example, grid[0][1] == [[1, 0]]
    C_xy = tf.expand_dims(C_xy, axis=2)  # [gx, gy, 1, 2]

    # YoloV2, YoloV3:
    # bx = sigmoid(tx) + Cx
    # by = sigmoid(ty) + Cy
    #
    # for example, if all elements in b_xy are (0.1, 0.2), the result will be
    #
    # [[[[0.1, 0.2]], [[1.1, 0.2]], [[2.1, 0.2]]],
    #  [[[0.1, 1.2]], [[1.1, 1.2]], [[2.1, 1.2]]],
    #  [[[0.1, 2.2]], [[1.1, 2.2]], [[2.1, 2.2]]]]
    #
    b_xy = tf.sigmoid(t_xy) + tf.cast(C_xy, tf.float32)

    # finally, divide this absolute box_xy by grid_size, and then we will get the normalized bbox centroids
    # for each anchor in each grid cell. b_xy is now in shape (batch_size, grid_size, grid_size, num_anchor, 2)
    #
    # [[[[0.1/3, 0.2/3]], [[1.1/3, 0.2/3]], [[2.1/3, 0.2/3]]],
    #  [[[0.1/3, 1.2/3]], [[1.1/3, 1.2]/3], [[2.1/3, 1.2/3]]],
    #  [[[0.1/3, 2.2/3]], [[1.1/3, 2.2/3]], [[2.1/3, 2.2/3]]]]
    #
    b_xy = b_xy / tf.cast(grid_size, tf.float32)

    # YoloV2:
    # "If the cell is offset from the top left corner of the image by (cx , cy)
    # and the bounding box prior has width and height pw , ph , then the predictions correspond to: "
    #
    # https://github.com/pjreddie/darknet/issues/568#issuecomment-469600294
    # "It’s OK for the predicted box to be wider and/or taller than the original image, but
    # it does not make sense for the box to have a negative width or height. That’s why
    # we take the exponent of the predicted number."
    b_wh = tf.exp(t_wh) * valid_anchors_wh

    y_box = tf.concat([b_xy, b_wh], axis=-1)
    return y_box, objectness, classes

In [None]:
def get_relative_yolo_box(y_true, valid_anchors_wh):
    """
    This is the inverse of `get_absolute_yolo_box` above. It's turning (bx, by, bw, bh) into
    (tx, ty, tw, th) that is relative to cell location.
    """
    grid_size = tf.shape(y_true)[1]
    C_xy = tf.meshgrid(tf.range(grid_size), tf.range(grid_size))
    C_xy = tf.expand_dims(tf.stack(C_xy, axis=-1), axis=2)

    b_xy = y_true[..., 0:2]
    b_wh = y_true[..., 2:4]
    t_xy = b_xy * tf.cast(grid_size, tf.float32) - tf.cast(C_xy, tf.float32)

    t_wh = tf.math.log(b_wh / valid_anchors_wh)
    # b_wh could have some cells are 0, divided by anchor could result in inf or nan
    t_wh = tf.where(
        tf.logical_or(tf.math.is_inf(t_wh), tf.math.is_nan(t_wh)),
        tf.zeros_like(t_wh), t_wh)

    y_box = tf.concat([t_xy, t_wh], axis=-1)
    return y_box

#**Loss Function**

In [None]:
class YoloLoss(object):
    def __init__(self, num_classes, valid_anchors_wh):
        self.num_classes = num_classes
        self.ignore_thresh = 0.5 ###
        self.valid_anchors_wh = valid_anchors_wh
        self.lambda_coord = 5.0
        self.lambda_noobj = 0.5  ###

    def __call__(self, y_true, y_pred):
        """
        calculate the loss of model prediction for one scale
        """
        # for xy and wh, I seperated them into two groups with different suffix
        # suffix rel (relative) means that its coordinates are relative to cells
        # basically (tx, ty, tw, th) format from the paper
        # _rel is used to calcuate the loss
        # suffix abs (absolute) means that its coordinates are absolute with in whole image
        # basically (bx, by, bw, bh) format from the paper
        # _abs is used to calcuate iou and ignore mask

        # split y_pred into xy, wh, objectness and one-hot classes
        # pred_xy_rel: (batch, grid, grid, anchor, 2)
        # pred_wh_rel: (batch, grid, grid, anchor, 2)
        # TODO: Add comment for the sigmoid here
        pred_xy_rel = tf.sigmoid(y_pred[..., 0:2])
        pred_wh_rel = y_pred[..., 2:4]

        # this box is used to calculate iou, NOT loss. so we can't use
        # cell offset anymore and have to transform it into true values
        # both pred_obj and pred_class has been sigmoid'ed here
        # pred_xy_abs: (batch, grid, grid, anchor, 2)
        # pred_wh_abs: (batch, grid, grid, anchor, 2)
        # pred_obj: (batch, grid, grid, anchor, 1)
        # pred_class: (batch, grid, grid, anchor, num_classes)
        pred_box_abs, pred_obj, pred_class = get_absolute_yolo_box(
            y_pred, self.valid_anchors_wh, self.num_classes)
        pred_box_abs = xywh_to_x1y1x2y2(pred_box_abs)

        # split y_true into xy, wh, objectness and one-hot classes
        # pred_xy_abs: (batch, grid, grid, anchor, 2)
        # pred_wh_abs: (batch, grid, grid, anchor, 2)
        # pred_obj: (batch, grid, grid, anchor, 1)
        # pred_class: (batch, grid, grid, anchor, num_classes)
        true_xy_abs, true_wh_abs, true_obj, true_class = tf.split(
            y_true, (2, 2, 1, self.num_classes), axis=-1)
        true_box_abs = tf.concat([true_xy_abs, true_wh_abs], axis=-1)
        true_box_abs = xywh_to_x1y1x2y2(true_box_abs)

        # true_box_rel: (batch, grid, grid, anchor, 4)
        true_box_rel = get_relative_yolo_box(y_true, self.valid_anchors_wh)
        true_xy_rel = true_box_rel[..., 0:2]
        true_wh_rel = true_box_rel[..., 2:4]

        # some adjustment to improve small box detection, note the (2-truth.w*truth.h) below
        # https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/src/yolo_layer.c#L190
        weight = 2 - true_wh_abs[..., 0] * true_wh_abs[..., 1]

        # YoloV2:
        # "If the cell is offset from the top left corner of the image by (cx , cy)
        # and the bounding box prior has width and height pw , ph , then the predictions correspond to:"
        #
        # to calculate the iou and determine the ignore mask, we need to first transform
        # prediction into real coordinates (bx, by, bw, bh)

        # YoloV2:
        # "This ground truth value can be easily computed by inverting the equations above."
        #
        # to calculate loss and differentiation, we need to transform ground truth into
        # cell offset first like demonstrated here:
        # https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/src/yolo_layer.c#L93
        xy_loss = self.calc_xy_loss(true_obj, true_xy_rel, pred_xy_rel, weight)
        wh_loss = self.calc_wh_loss(true_obj, true_wh_rel, pred_wh_rel, weight)
        class_loss = self.calc_class_loss(true_obj, true_class, pred_class)

        # use the absolute yolo box to calculate iou and ignore mask
        ignore_mask = self.calc_ignore_mask(true_obj, true_box_abs,
                                            pred_box_abs)
        obj_loss = self.calc_obj_loss(true_obj, pred_obj, ignore_mask)

        # YoloV1: Function (3)
        return xy_loss + wh_loss + class_loss + obj_loss, (xy_loss, wh_loss,
                                                           class_loss,
                                                           obj_loss)

    def calc_ignore_mask(self, true_obj, true_box, pred_box):
        # eg. true_obj (1, 13, 13, 3, 1)
        true_obj = tf.squeeze(true_obj, axis=-1)
        # eg. true_obj (1, 13, 13, 3)
        # eg. true_box (1, 13, 13, 3, 4)
        # eg. pred_box (1, 13, 13, 2, 4)
        # eg. true_box_filtered (2, 4) it was (3, 4) but one element got filtered out
        true_box_filtered = tf.boolean_mask(true_box, tf.cast(
            true_obj, tf.bool))

        # YOLOv3:
        # "If the bounding box prior is not the best but does overlap a ground
        # truth object by more than some threshold we ignore the prediction,
        # following [17]. We use the threshold of .5."
        # calculate the iou for each pair of pred bbox and true bbox, then find the best among them
        # eg. best_iou (1, 1, 1, 2)
        best_iou = tf.reduce_max(
            broadcast_iou(pred_box, true_box_filtered), axis=-1)

        # if best iou is higher than threshold, set the box to be ignored for noobj loss
        # eg. ignore_mask(1, 1, 1, 2)
        ignore_mask = tf.cast(best_iou < self.ignore_thresh, tf.float32)
        ignore_mask = tf.expand_dims(ignore_mask, axis=-1)
        return ignore_mask

    def calc_obj_loss(self, true_obj, pred_obj, ignore_mask):
        """
        calculate loss of objectness: sum of L2 distances
        inputs:
        true_obj: objectness from ground truth in shape of (batch, grid, grid, anchor, num_classes)
        pred_obj: objectness from model prediction in shape of (batch, grid, grid, anchor, num_classes)
        outputs:
        obj_loss: objectness loss
        """
        obj_entropy = binary_cross_entropy(pred_obj, true_obj)

        obj_loss = true_obj * obj_entropy
        noobj_loss = (1 - true_obj) * obj_entropy * ignore_mask

        obj_loss = tf.reduce_sum(obj_loss, axis=(1, 2, 3, 4)) 
        noobj_loss = tf.reduce_sum(
            noobj_loss, axis=(1, 2, 3, 4)) * self.lambda_noobj

        return obj_loss + noobj_loss

    def calc_class_loss(self, true_obj, true_class, pred_class):
        """
        calculate loss of class prediction
        inputs:
        true_obj: if the object present from ground truth in shape of (batch, grid, grid, anchor, 1)
        true_class: one-hot class from ground truth in shape of (batch, grid, grid, anchor, num_classes)
        pred_class: one-hot class from model prediction in shape of (batch, grid, grid, anchor, num_classes)
        outputs:
        class_loss: class loss
        """
        # Yolov1:
        # "Note that the loss function only penalizes classiﬁcation error
        # if an object is present in that grid cell (hence the conditional
        # class probability discussed earlier).
        class_loss = binary_cross_entropy(pred_class, true_class)
        class_loss = true_obj * class_loss
        class_loss = tf.reduce_sum(class_loss, axis=(1, 2, 3, 4))
        return class_loss

    def calc_xy_loss(self, true_obj, true_xy, pred_xy, weight):
        """
        calculate loss of the centroid coordinate: sum of L2 distances
        inputs:
        true_obj: if the object present from ground truth in shape of (batch, grid, grid, anchor, 1)
        true_xy: centroid x and y from ground truth in shape of (batch, grid, grid, anchor, 2)
        pred_xy: centroid x and y from model prediction in shape of (batch, grid, grid, anchor, 2)
        weight: weight adjustment, reward smaller bounding box
        outputs:
        xy_loss: centroid loss
        """
        # shape (batch, grid, grid, anchor), eg. (32, 13, 13, 3)
        xy_loss = tf.reduce_sum(tf.square(true_xy - pred_xy), axis=-1)

        # in order to element-wise multiply the result from tf.reduce_sum
        # we need to squeeze one dimension for objectness here
        true_obj = tf.squeeze(true_obj, axis=-1)

        # YoloV1:
        # "It also only penalizes bounding box coordinate error if that
        # predictor is "responsible" for the ground truth box (i.e. has the
        # highest IOU of any predictor in that grid cell)."
        xy_loss = true_obj * xy_loss * weight

        xy_loss = tf.reduce_sum(xy_loss, axis=(1, 2, 3)) * self.lambda_coord

        return xy_loss

    def calc_wh_loss(self, true_obj, true_wh, pred_wh, weight):
        """
        calculate loss of the width and height: sum of L2 distances
        inputs:
        true_obj: if the object present from ground truth in shape of (batch, grid, grid, anchor, 1)
        true_wh: width and height from ground truth in shape of (batch, grid, grid, anchor, 2)
        pred_wh: width and height from model prediction in shape of (batch, grid, grid, anchor, 2)
        weight: weight adjustment, reward smaller bounding box
        outputs:
        wh_loss: width and height loss
        """
        # shape (batch, grid, grid, anchor), eg. (32, 13, 13, 3)
        wh_loss = tf.reduce_sum(tf.square(true_wh - pred_wh), axis=-1)
        true_obj = tf.squeeze(true_obj, axis=-1)
        wh_loss = true_obj * wh_loss * weight
        wh_loss = tf.reduce_sum(wh_loss, axis=(1, 2, 3)) * self.lambda_coord
        return wh_loss

In [None]:
class Preprocessor(object):
    def __init__(self, is_train, num_classes, output_shape=(416, 416)):
        self.is_train = is_train
        self.num_classes = num_classes
        self.output_shape = output_shape

    def __call__(self, example):
        features = self.parse_tfexample(example)
        encoded = features['image/encoded']
        #tf.print(features['image/filename'])
        image = tf.io.decode_jpeg(encoded)
        

        image = tf.cast(image, tf.float32)
        classes, bboxes = self.parse_y_features(features)
        image         = self.change_intensity(image)
        image         = self.change_contrast(image)
        #image         = self.add_noise_to_image(image)
        
        image, bboxes = self.random_flip_image_and_label(image, bboxes)
        image, bboxes = self.random_crop_image_and_label(image, bboxes)
        
        image = tf.image.resize(image, self.output_shape)
        image = tf.cast(image, tf.float32) / 127.5 - 1 # 127.5 = 255/2 , map to (-1,1) range 

        label = (
            self.preprocess_label_for_one_scale(classes, bboxes, 26,
                                                np.array([0, 1, 2])),
            self.preprocess_label_for_one_scale(classes, bboxes, 13,
                                                np.array([3, 4, 5])),
        )
        return image, label

    def change_intensity(self, image):
        image = tf.cast(image, tf.int32)
        r = tf.cast(tf.random.uniform(shape=[1], minval=-50, maxval=50),tf.int32)
        image = tf.math.maximum(tf.math.minimum(image + r,255),0) ##adjust brightness
        return image
    
    def change_contrast(self, image):
        r = tf.cast(tf.random.uniform(shape=[1], minval=0.7, maxval=1.0),tf.float32)
        image = tf.image.adjust_contrast(image, r[0]) ## adjust contrast
        return image

    def add_noise_to_image(self, image):
        r = tf.random.uniform([1])
        if r < 0.3:
            image = tf.cast(image, tf.float32)
            mean = 0
            sigma = 100*r # max sigma = 30 
            gauss = tf.random.normal(tf.shape(image),mean,sigma)
            image = tf.math.maximum(tf.math.minimum(image + gauss,255),0)
            image = tf.cast(image, tf.int32)
        return image

    def random_flip_image_and_label(self, image, bboxes):
        """
        flip left and right for 50% of images
        """
        r = tf.random.uniform([1])
        if r < 0.5:
            image = tf.image.flip_left_right(image)
            xmin, ymin, xmax, ymax = tf.split(bboxes, [1, 1, 1, 1], -1)
            # note that we need to switch here
            xmin, xmax = 1 - xmax, 1 - xmin
            bboxes = tf.squeeze(
                tf.stack([xmin, ymin, xmax, ymax], axis=1), axis=-1)

        return image, bboxes

    def get_random_crop_delta(self, bboxes):
        """
        get a random crop which includes all bounding boxes. Since all bboxes here belong to one image,
        we can calcualte the minimum of all xmin and ymin, and the maximum of all xmax and ymax to get
        the an area that can include all boxes. the crop will be randomly picked between this area boundary and
        the boundary of the whole image.
        """
        min_xmin = tf.math.reduce_min(bboxes[..., 0])
        min_ymin = tf.math.reduce_min(bboxes[..., 1])
        max_xmax = tf.math.reduce_max(bboxes[..., 2])
        max_ymax = tf.math.reduce_max(bboxes[..., 3])

        # delta is the normalized margin from bboxes boundary the crop boundary
        # ____________________________________
        # |         ________________         |
        # |image    |crop ______   |         |
        # |<-DELTA->|     |bbox|   |<-DELTA->|
        # |         |     |____|   |         |
        # |         |______________|         |
        # |__________________________________|
        xmin_delta = tf.random.uniform([1], 0, min_xmin)
        ymin_delta = tf.random.uniform([1], 0, min_ymin)
        xmax_delta = tf.random.uniform([1], 0, 1 - max_xmax)
        ymax_delta = tf.random.uniform([1], 0, 1 - max_ymax)

        return xmin_delta, ymin_delta, xmax_delta, ymax_delta

    def random_crop_image_and_label(self, image, bboxes):
        """
        crop images randomly at 50% chance but preserve all bounding boxes. the crop is guaranteed to include
        all bounding boxes. 
        """
        
        r = tf.random.uniform([1])
        if r < 0.5:
            xmin_delta, ymin_delta, xmax_delta, ymax_delta = self.get_random_crop_delta(
                bboxes)

            xmin, ymin, xmax, ymax = tf.split(bboxes, [1, 1, 1, 1], -1)
            # before crop: |_0.1_|_0.1_|____________0.5___________|_0.1_|___0.2___|
            # after crop:  |_0.1_|____________0.5___________|_0.1_|
            # imagine old xmin is 0.2 (0.1+0.1), old xmax is 0.8 (0.1+0.1+0.5+0.1)
            # if we cut both left 0.1 (xmin_delta) and right 0.2 (xmax_delta)
            # the new xmin will be (0.2 - 0.1) / (1 - 0.1 - 0.2) = 1/7
            # the new xmax will be (0.8 - 0.1) / (1 - 0.1 - 0.2) = 6/7
            # same thing for y
            xmin = (xmin - xmin_delta) / (1 - xmin_delta - xmax_delta)
            ymin = (ymin - ymin_delta) / (1 - ymin_delta - ymax_delta)
            xmax = (xmax - xmin_delta) / (1 - xmin_delta - xmax_delta)
            ymax = (ymax - ymin_delta) / (1 - ymin_delta - ymax_delta)

            bboxes = tf.squeeze(
                tf.stack([xmin, ymin, xmax, ymax], axis=1), axis=-1)
            h = tf.cast(tf.shape(image)[0], dtype=tf.float32)
            w = tf.cast(tf.shape(image)[1], dtype=tf.float32)

            offset_height = tf.cast(ymin_delta[0] * h, dtype=tf.int32)
            offset_width = tf.cast(xmin_delta[0] * w, dtype=tf.int32)
            target_height = tf.cast(
                tf.math.ceil((1 - ymax_delta - ymin_delta)[0] * h),
                dtype=tf.int32)
            target_width = tf.cast(
                tf.math.ceil((1 - xmax_delta - xmin_delta)[0] * w),
                dtype=tf.int32)

            image = image[offset_height:offset_height +
                          target_height, offset_width:offset_width +
                          target_width, :]
        return image, bboxes

    def parse_y_features(self, features):
        classes = tf.sparse.to_dense(features['image/object/class/label'])
        classes = tf.one_hot(classes, self.num_classes)

        # tf.pad(classes, [[0, 100 - tf.shape(classes)[0]], []], 'CONSTANT')

        # bboxes shape (None, 4)
        bboxes = tf.stack([
            tf.sparse.to_dense(features['image/object/bbox/xmin']),
            tf.sparse.to_dense(features['image/object/bbox/ymin']),
            tf.sparse.to_dense(features['image/object/bbox/xmax']),
            tf.sparse.to_dense(features['image/object/bbox/ymax']),
        ],
                          axis=1)
        return classes, bboxes

    def preprocess_label_for_one_scale(self,
                                       classes,
                                       bboxes,
                                       grid_size=13,
                                       valid_anchors=None):
        """
        preprocess the class and bounding boxes annotations into model desired format for one scale
        (grid, grid, anchor, (centroid x, centroid y, width, height, objectness, ...one-hot classes...))
        inputs:
        grid_size: a scalar grid size to use
        outputs:
        y: the desired label format to calcualte loss
        """
        # construct an empty placeholder for the final output y first
        y = tf.zeros((grid_size, grid_size, 3, 5 + self.num_classes))

        # find the best anchor indices for each ground truth box
        anchor_indices = self.find_best_anchor(bboxes)

        # necessary assertion, otherwise the steps later would fail
        tf.Assert(classes.shape[0] == bboxes.shape[0], [classes])
        tf.Assert(anchor_indices.shape[0] == bboxes.shape[0], [anchor_indices])

        # this has to be tf.shape instead of classes.shape, otherwise would be None
        num_boxes = tf.shape(classes)[0]

        indices = tf.TensorArray(tf.int32, 1, dynamic_size=True)
        updates = tf.TensorArray(tf.float32, 1, dynamic_size=True)

        valid_count = 0
        for i in tf.range(num_boxes):
            curr_class = tf.cast(classes[i], tf.float32)
            curr_box = bboxes[i]
            curr_anchor = anchor_indices[i]

            # only use the anchor when it belongs to current scale (grid_size)
            # for example, when grid size is 13, only anchor 6, 7, 8 (big anchors) are valid
            # because the reception field of this grid size is the biggest
            # however, if grid size is 52, the finest grained grid, we can only use anchor
            # 0, 1, 2 (small anchors)
            anchor_found = tf.reduce_any(curr_anchor == valid_anchors)
            if anchor_found:
                # now that we found the anchor, we need to set it in our final output y
                # we only have three anchor boxes in y, so we need to mod by 3 first to get
                # adjusted index. eg. anchor 7 will have index 1
                # we need to reshape here so that adjusted_anchor_index is a vector
                adjusted_anchor_index = tf.math.floormod(curr_anchor, 3)

                # we need to turn (xmin, ymin, xmax, ymax) box format into
                # (centeroid x, centroid y, width, height) to be able to
                # calculate yolo loss later
                curr_box_xy = (curr_box[..., 0:2] + curr_box[..., 2:4]) / 2
                curr_box_wh = curr_box[..., 2:4] - curr_box[..., 0:2]

                # calculate which grid cell should we use
                # eg. when curr_box_xy = [0.25, 0.25], and grid size = 26, which is a quarter of the image
                # the index of grid cell is floor(0.25 * 26) = 6
                grid_cell_xy = tf.cast(
                    curr_box_xy // tf.cast((1 / grid_size), dtype=tf.float32),
                    tf.int32)

                # for this box, we need to update y at location (grid_size, grid_size, adjusted_anchor_index)
                # eg. shape in (13, 13, 1)
                # grid[y][x][anchor] = (tx, ty, bw, bh, obj, class)
                # note that it's not grid[x][y]
                index = tf.stack(
                    [grid_cell_xy[1], grid_cell_xy[0], adjusted_anchor_index])

                # this is the value we use to update the above location
                # eg. shape in (7)
                # note that we need to make this one-hot classes in order to use categorical crossentropy later
                update = tf.concat(
                    values=[
                        curr_box_xy, curr_box_wh,
                        tf.constant([1.0]), curr_class
                    ],
                    axis=0)
                # add to final indices and updates to be written into y
                indices = indices.write(valid_count, index)
                updates = updates.write(valid_count, update)
                # tf.print(indices.stack())
                # tf.print(updates.stack())
                valid_count = 1 + valid_count

        y = tf.tensor_scatter_nd_update(y, indices.stack(), updates.stack())
        return y

    def find_best_anchor(self, y_box):
        """
        find the best anchor for num_boxes ground truth boxes in y_box. Return a tensor in shape
        of (num_boxes) that indicates the indices of best anchor for each box
        inputs:
        y_box: ground truth boxes in shape of (num_boxes, 4)
        outputs:
        anchor_idx: anchor indices in shape of (num_boxes)
        """
        box_wh = y_box[..., 2:4] - y_box[..., 0:2]

        # since box_wh is (num_boxes, 2) and anchor_wh is (9, 2), we need to tile box_wh
        # first to match number to anchor in order to apply tf.minimum later
        # eg. box_wh -> (2, 9, 2)
        box_wh = tf.tile(
            tf.expand_dims(box_wh, -2), (1, tf.shape(anchors_wh)[0], 1))

        # the intersection here is not calculated based on real coordinates
        # but assuming anchor and box share same centroid to help us decide
        # which is the best fit anchor for this box
        # so we just take the product of minimum width and height as intersection
        # eg. intersection -> (2, 9)
        intersection = tf.minimum(box_wh[..., 0],
                                  anchors_wh[..., 0]) * tf.minimum(
                                      box_wh[..., 1], anchors_wh[..., 1])

        # box_area is the width*height for each box
        # eg box_area -> (2, 9)
        box_area = box_wh[..., 0] * box_wh[..., 1]

        # anchor area is the width*height for each anchor
        # eg anchor_area -> (9)
        anchor_area = anchors_wh[..., 0] * anchors_wh[..., 1]

        # eg. iou -> (2, 9)
        iou = intersection / (box_area + anchor_area - intersection)

        # find the best anchor for each box, there should be num_boxes indices
        # in the result
        # eg. anchor_idx -> (2)
        anchor_idx = tf.cast(tf.argmax(iou, axis=-1), tf.int32)
        return anchor_idx

    def parse_tfexample(self, example_proto):
        image_feature_description = {
            'image/height': tf.io.FixedLenFeature([], tf.int64),
            'image/width': tf.io.FixedLenFeature([], tf.int64),
            'image/depth': tf.io.FixedLenFeature([], tf.int64),
            'image/object/class/label': tf.io.VarLenFeature(tf.int64),
            'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
            'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
            'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
            'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
            'image/encoded': tf.io.FixedLenFeature([], tf.string),
            'image/filename': tf.io.FixedLenFeature([], tf.string),
        }
        return tf.io.parse_single_example(example_proto,
                                          image_feature_description)

#**Training**

In [None]:
class Trainer(object):
    def __init__(self,
                 model,
                 initial_epoch,
                 epochs,
                 global_batch_size,
                 strategy,
                 initial_learning_rate=0.01):
        self.model = model
        self.initial_epoch = initial_epoch
        self.epochs = epochs
        self.strategy = strategy
        self.global_batch_size = global_batch_size
        self.loss_objects = [
            YoloLoss(
                num_classes=TOTAL_CLASSES,
                valid_anchors_wh=anchors_wh[0:3]),  # small scale 26x26
            YoloLoss(
                num_classes=TOTAL_CLASSES,
                valid_anchors_wh=anchors_wh[3:6]),  # large scale 13x13
        ]
        self.optimizer = tf.keras.optimizers.Adam(
            learning_rate=initial_learning_rate)

        # for learning rate schedule
        self.current_learning_rate = initial_learning_rate
        self.last_val_loss = math.inf 
        self.lowest_val_loss = math.inf
        ###
        self.last_train_loss = math.inf 
        self.lowest_train_loss = math.inf
        ###
        self.patience_count = 0
        self.max_patience = 10 ##

    def lr_decay(self):
        """
        This effectively simulate ReduceOnPlateau learning rate schedule. Learning rate
        will be reduced by a factor of 10 if there's no improvement over [max_patience] epochs
        """
        if self.patience_count == self.max_patience:
            self.current_learning_rate /= 10.0
            self.patience_count = 0
        elif self.last_val_loss == self.lowest_val_loss:
            self.patience_count = 0
        self.patience_count += 1

        self.optimizer.learning_rate = self.current_learning_rate

    def train_step(self, inputs):
        images, labels = inputs

        with tf.GradientTape() as tape:
            outputs = self.model(images, training=True)
            total_losses = []
            xy_losses = []
            wh_losses = []
            class_losses = []
            obj_losses = []
            # iterate over all three scales
            for loss_object, y_pred, y_true in zip(self.loss_objects, outputs,
                                                   labels):
                total_loss, loss_breakdown = loss_object(y_true, y_pred)
                xy_loss, wh_loss, class_loss, obj_loss = loss_breakdown
                total_losses.append(total_loss * (1. / self.global_batch_size))
                xy_losses.append(xy_loss * (1. / self.global_batch_size))
                wh_losses.append(wh_loss * (1. / self.global_batch_size))
                class_losses.append(class_loss * (1. / self.global_batch_size))
                obj_losses.append(obj_loss * (1. / self.global_batch_size))
            
            total_loss = tf.reduce_sum(total_losses)
            total_xy_loss = tf.reduce_sum(xy_losses)
            total_wh_loss = tf.reduce_sum(wh_losses)
            total_class_loss = tf.reduce_sum(class_losses)
            total_obj_loss = tf.reduce_sum(obj_losses)


        grads = tape.gradient(
            target=total_loss, sources=self.model.trainable_variables)
        self.optimizer.apply_gradients(
            zip(grads, self.model.trainable_variables))

        return total_loss, (total_xy_loss, total_wh_loss, total_class_loss,
                            total_obj_loss)

    def val_step(self, inputs):
        images, labels = inputs

        outputs = self.model(images, training=False)
        losses = []
        # iterate over all three scales
        for loss_object, y_pred, y_true in zip(self.loss_objects, outputs,
                                               labels):
            loss, _ = loss_object(y_true, y_pred)
            losses.append(loss * (1. / self.global_batch_size))
        total_loss = tf.reduce_sum(losses)

        return total_loss

    def get_current_time(self):
        return datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

    def run(self, train_dist_dataset, val_dist_dataset):
        total_steps = tf.constant(0, dtype=tf.int64)

        @tf.function
        def distributed_train_epoch(dataset, train_summary_writer,
                                    total_steps):
            total_loss = 0.0
            xy_hist = 0.0
            wh_hist = 0.0
            classes_hist = 0.0
            obj_hist = 0.0
            num_train_batches = tf.constant(0, dtype=tf.int64)
            for one_batch in dataset:
                per_replica_losses, per_replica_losses_breakdown = self.strategy.experimental_run_v2(
                    self.train_step, args=(one_batch, ))
                per_replica_xy_losses, per_replica_wh_losses, per_replica_class_losses, per_replica_obj_losses = per_replica_losses_breakdown
                batch_loss = self.strategy.reduce(
                    tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None)
                batch_xy_loss = self.strategy.reduce(
                    tf.distribute.ReduceOp.SUM,
                    per_replica_xy_losses,
                    axis=None)
                batch_wh_loss = self.strategy.reduce(
                    tf.distribute.ReduceOp.SUM,
                    per_replica_wh_losses,
                    axis=None)
                batch_class_loss = self.strategy.reduce(
                    tf.distribute.ReduceOp.SUM,
                    per_replica_class_losses,
                    axis=None)
                batch_obj_loss = self.strategy.reduce(
                    tf.distribute.ReduceOp.SUM,
                    per_replica_obj_losses,
                    axis=None)
                total_loss += batch_loss
                xy_hist += batch_xy_loss
                wh_hist += batch_wh_loss
                classes_hist += batch_class_loss
                obj_hist += batch_obj_loss
                num_train_batches += 1
                tf.print('Trained batch:', num_train_batches, 'batch loss:',
                         batch_loss, 'batch xy loss', batch_xy_loss,
                         'batch wh loss', batch_wh_loss, 'batch obj loss',
                         batch_obj_loss, 'batch_class_loss', batch_class_loss,
                         'epoch total loss:', total_loss)
                with train_summary_writer.as_default():
                    tf.summary.scalar(
                        'batch train loss',
                        batch_loss,
                        step=total_steps + num_train_batches)
                    tf.summary.scalar(
                        'batch xy loss',
                        batch_xy_loss,
                        step=total_steps + num_train_batches)
                    tf.summary.scalar(
                        'batch wh loss',
                        batch_wh_loss,
                        step=total_steps + num_train_batches)
                    tf.summary.scalar(
                        'batch obj loss',
                        batch_obj_loss,
                        step=total_steps + num_train_batches)
                    tf.summary.scalar(
                        'batch class loss',
                        batch_class_loss,
                        step=total_steps + num_train_batches)
            return total_loss, num_train_batches, (xy_hist,wh_hist,classes_hist,obj_hist)

        @tf.function
        def distributed_val_epoch(dataset):
            total_loss = 0.0
            num_val_batches = tf.constant(0, dtype=tf.int64)
            for one_batch in dataset:
                per_replica_losses = self.strategy.experimental_run_v2(
                    self.val_step, args=(one_batch, ))
                batch_loss = self.strategy.reduce(
                    tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None)
                total_loss += batch_loss
                num_val_batches += 1
            return total_loss, num_val_batches

        current_time = self.get_current_time()
        train_log_dir = 'logs/gradient_tape/' + current_time + '/train'
        val_log_dir = 'logs/gradient_tape/' + current_time + '/val'
        train_summary_writer = tf.summary.create_file_writer(train_log_dir)
        val_summary_writer = tf.summary.create_file_writer(val_log_dir)

        tf.print('{} Start training...'.format(current_time))
        train_history = []
        xy_history = []
        wh_history = []
        obj_history = []
        class_history = []
        val_history = []
        loss_file = open('/content/gdrive/My Drive/loss.txt','a+')
        for epoch in range(self.initial_epoch, self.epochs + 1):
            t0 = time.time()
            self.lr_decay()

            tf.print(
                '{} Started epoch {} with learning rate {}. Current LR patience count is {} epochs. Last lowest train loss is {}. Last lowest val loss is {}.'
                .format(self.get_current_time(), epoch,
                        self.current_learning_rate, self.patience_count,
                        self.lowest_train_loss , self.lowest_val_loss))
            train_total_loss, num_train_batches, (xyLOSS, whLOSS, classLOSS, objLOSS) = distributed_train_epoch(
                train_dist_dataset, train_summary_writer, total_steps)
            t1 = time.time()
            train_loss = train_total_loss / tf.cast(
                num_train_batches, dtype=tf.float32)
            xy_loss_hist = xyLOSS / tf.cast(
                num_train_batches, dtype=tf.float32)
            wh_loss_hist = whLOSS / tf.cast(
                num_train_batches, dtype=tf.float32)
            class_loss_hist = classLOSS / tf.cast(
                num_train_batches, dtype=tf.float32)
            obj_loss_hist = objLOSS / tf.cast(
                num_train_batches, dtype=tf.float32)
            train_history.append(train_loss)
            xy_history.append(xy_loss_hist)
            wh_history.append(wh_loss_hist)
            class_history.append(class_loss_hist)
            obj_history.append(obj_loss_hist)
            tf.print(
                '{} Epoch {} train loss {}, total train batches {}, {} examples per second'
                .format(
                    self.get_current_time(), epoch, train_loss,
                    num_train_batches,
                    tf.cast(num_train_batches, dtype=tf.float32) *
                    self.global_batch_size / (t1 - t0)))
            with train_summary_writer.as_default():
                tf.summary.scalar('epoch train loss', train_loss, step=epoch)
            total_steps += num_train_batches

            val_total_loss, num_val_batches = distributed_val_epoch(
                val_dist_dataset)

            t2 = time.time()
            val_loss = val_total_loss / tf.cast(
                num_val_batches, dtype=tf.float32)
            val_history.append(val_loss)
            loss_line = "%d , %f , %f , %f , %f , %f , %f\r\n"%(epoch,xy_loss_hist,wh_loss_hist,
                                                       class_loss_hist,obj_loss_hist,
                                                       train_loss,val_loss)
            loss_file.write(loss_line)
            tf.print(loss_line)
            tf.print(
                '{} Epoch {} val loss {}, total val batches {}, {} examples per second'
                .format(
                    self.get_current_time(), epoch, val_loss, num_val_batches,
                    tf.cast(num_val_batches, dtype=tf.float32) *
                    self.global_batch_size / (t2 - t1)))
            with val_summary_writer.as_default():
                tf.summary.scalar('epoch val loss', val_loss, step=epoch)

            ##
            if train_loss < self.lowest_train_loss:
                self.lowest_train_loss = train_loss
            self.last_train_loss = train_loss
            # save model when reach a new lowest validation loss
            if val_loss < self.lowest_val_loss:
                self.lowest_val_loss = val_loss
                ##
                if val_loss < 70.0: # save when loss is less than 70
                    self.save_model(epoch, val_loss)
            self.last_val_loss = val_loss

            ##
            if (epoch % 10 == 0): ## save model after every 10 epochs
                self.save_model(epoch, val_loss)

        self.save_model(self.epochs, self.last_val_loss)
        print('{} Finished.'.format(self.get_current_time()))
        return train_history, xy_history, wh_history, class_history, obj_history, val_history


    def save_model(self, epoch, loss):
        # https://github.com/tensorflow/tensorflow/issues/33565
        #model_name = './models/model-v1.0.1-epoch-{}-loss-{:.4f}.tf'.format(epoch, loss)
        model_name = '/content/gdrive/My Drive/results/epoch-{}-loss-{:.3f}.h5'.format(epoch, loss)
        #self.model.save_weights(model_name)
        self.model.save(model_name)
        print("Model {} saved.".format(model_name))

In [None]:
def create_dataset(tfrecords, batch_size, is_train):
    preprocess = Preprocessor(is_train, TOTAL_CLASSES, OUTPUT_SHAPE)

    dataset = tf.data.Dataset.list_files(tfrecords)
    dataset = tf.data.TFRecordDataset(dataset)
    dataset = dataset.map(preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)

    if is_train:
        dataset = dataset.shuffle(512)

    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    return dataset

In [None]:
# predefined anchors:
anchors_wh = np.array([ [10,14],  [23,27],  [37,58] , [81,82],  [135,169],  [344,319] ] ,
                      np.float32) / 416

In [None]:
BATCH_SIZE = 64
TOTAL_CLASSES = 6
TOTAL_EPOCHS = 10
OUTPUT_SHAPE = (416, 416)
TF_RECORDS = '/content/gdrive/My Drive/tfrecords_2014'
start_LR = 0.01 # starting learning rate
tf.random.set_seed(1)
first_time = True
pre_weights = '/content/gdrive/My Drive/result/epoch-20-loss-22.862.h5'

In [None]:
strategy = tf.distribute.MirroredStrategy()
global_batch_size = strategy.num_replicas_in_sync * BATCH_SIZE
train_dataset = create_dataset('{}/train*'.format(TF_RECORDS), global_batch_size, is_train=True)
#train_dataset = create_dataset('{}/train/train*'.format(TF_RECORDS), global_batch_size, is_train=True)
val_dataset = create_dataset('{}/val*'.format(TF_RECORDS), global_batch_size, is_train=False)
#val_dataset = create_dataset('{}/val/val*'.format(TF_RECORDS), global_batch_size, is_train=False)
if not os.path.exists(os.path.join('./models')):
    os.makedirs(os.path.join('./models/'))
 
with strategy.scope():
 
  train_dist_dataset = strategy.experimental_distribute_dataset(train_dataset)
  val_dist_dataset = strategy.experimental_distribute_dataset(val_dataset)
  #tiny_model = TL_tiny_model(TOTAL_CLASSES)
  tiny_model = our_tiny_model(TOTAL_CLASSES, first_time)
  #model.summary()
  if not(first_time):
      tiny_model.load_weights(pre_weights)
  initial_epoch = 1  
  trainer = Trainer(
            model=tiny_model,
            initial_epoch=initial_epoch,
            epochs=TOTAL_EPOCHS,
            global_batch_size=global_batch_size,
            strategy=strategy,
            initial_learning_rate = start_LR
        )
  
  train_history, xy_history, wh_history, class_history, obj_history, val_history = trainer.run(train_dist_dataset, val_dist_dataset)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
20201008-153737 Start training...
20201008-153737 Started epoch 1 with learning rate 0.01. Current LR patience count is 1 epochs. Last lowest train loss is inf. Last lowest val loss is inf.
Trained batch: 1 batch loss: 919.356812 batch xy loss 11.7694225 batch wh loss 139.037415 batch obj loss 753.231628 batch_class_loss 15.3183699 epoch total loss: 919.356812
Trained batch: 2 batch loss: 14724.9824 batch xy loss 13.2165489 batch wh loss 13541.1143 batch obj loss 1155.82227 batch_class_loss 14.8286819 epoch total loss: 15644.3389
Trained batch: 3 batch loss: 1617.07422 batch xy loss 9.59143066 batch wh loss 905.828369 batch obj loss 687.465393 batch_class_loss 14.1890011 epoch total loss: 17261.4141
Trained batch: 4 batch loss: 784.943176 batch xy loss 7.87267303 batch wh loss 355.545227 batch obj loss 406.75412 batch_class_loss 14.7711515 epoch total loss: 18046.3574
Trained batch: 5 