<a href="https://colab.research.google.com/github/D10752002/dsdayolo_implementation_1/blob/main/dsdayolo1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# import required libraries
import io
import os

from collections import defaultdict
import tensorflow as tf
import numpy as np
from keras import backend as K
from keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D, MaxPool2D
from keras.layers.merge import add, concatenate
from keras.models import Model
from keras.layers import Activation

print("TensorFlow version is: {}".format(tf.__version__))
print("Eager execution is: {}".format(tf.executing_eagerly()))
print("Keras version is: {}".format(tf.keras.__version__))

TensorFlow version is: 2.8.2
Eager execution is: True
Keras version is: 2.8.0


In [3]:
path='/content/drive/MyDrive'
os.chdir(path)

In [4]:
class BatchNormalization(tf.keras.layers.BatchNormalization):
    """
    "Frozen state" and "inference mode" are two separate concepts.
    `layer.trainable = False` is to freeze the layer, so the layer will use
    stored moving `var` and `mean` in the "inference mode", and both `gama`
    and `beta` will not be updated !
    """
    def call(self, x, training=False):
        if not training:
            training = tf.constant(False)
        training = tf.logical_and(training, self.trainable)
        return super().call(x, training)

def convolutional(input_layer, filters_shape, downsample=False, activate=True, bn=True, activate_type='leaky'):
    if downsample:
        input_layer = tf.keras.layers.ZeroPadding2D(((1, 0), (1, 0)))(input_layer)
        padding = 'valid'
        strides = 2
    else:
        strides = 1
        padding = 'same'

    conv = tf.keras.layers.Conv2D(filters=filters_shape[-1], kernel_size = filters_shape[0], strides=strides, padding=padding,
                                  use_bias=not bn, kernel_regularizer=tf.keras.regularizers.l2(0.0005),
                                  kernel_initializer=tf.random_normal_initializer(stddev=0.01),
                                  bias_initializer=tf.constant_initializer(0.))(input_layer)

    if bn: conv = BatchNormalization()(conv)
    if activate == True:
        if activate_type == "leaky":
            conv = tf.nn.leaky_relu(conv, alpha=0.1)
        elif activate_type == "mish":
            conv = mish(conv)
    return conv

def mish(x):
    return x * tf.math.tanh(tf.math.softplus(x))
    # return tf.keras.layers.Lambda(lambda x: x*tf.tanh(tf.math.log(1+tf.exp(x))))(x)

def residual_block(input_layer, input_channel, filter_num1, filter_num2, activate_type='leaky'):
    short_cut = input_layer
    conv = convolutional(input_layer, filters_shape=(1, 1, input_channel, filter_num1), activate_type=activate_type)
    conv = convolutional(conv       , filters_shape=(3, 3, filter_num1,   filter_num2), activate_type=activate_type)

    residual_output = short_cut + conv
    return residual_output

# def block_tiny(input_layer, input_channel, filter_num1, activate_type='leaky'):
#     conv = convolutional(input_layer, filters_shape=(3, 3, input_channel, filter_num1), activate_type=activate_type)
#     short_cut = input_layer
#     conv = convolutional(conv, filters_shape=(3, 3, input_channel, filter_num1), activate_type=activate_type)
#
#     input_data = tf.concat([conv, short_cut], axis=-1)
#     return residual_output

def route_group(input_layer, groups, group_id):
    convs = tf.split(input_layer, num_or_size_splits=groups, axis=-1)
    return convs[group_id]

def upsample(input_layer):
    return tf.image.resize(input_layer, (input_layer.shape[1] * 2, input_layer.shape[2] * 2), method='bilinear')

In [5]:
def cspdarknet53(input_data):

    input_data = convolutional(input_data, (3, 3,  3,  32), activate_type="mish")
    input_data = convolutional(input_data, (3, 3, 32,  64), downsample=True, activate_type="mish")

    route = input_data
    route = convolutional(route, (1, 1, 64, 64), activate_type="mish")
    input_data = convolutional(input_data, (1, 1, 64, 64), activate_type="mish")
    for i in range(1):
        input_data = residual_block(input_data,  64,  32, 64, activate_type="mish")
    input_data = convolutional(input_data, (1, 1, 64, 64), activate_type="mish")

    input_data = tf.concat([input_data, route], axis=-1)
    input_data = convolutional(input_data, (1, 1, 128, 64), activate_type="mish")
    input_data = convolutional(input_data, (3, 3, 64, 128), downsample=True, activate_type="mish")
    route = input_data
    route = convolutional(route, (1, 1, 128, 64), activate_type="mish")
    input_data = convolutional(input_data, (1, 1, 128, 64), activate_type="mish")
    for i in range(2):
        input_data = residual_block(input_data, 64,  64, 64, activate_type="mish")
    input_data = convolutional(input_data, (1, 1, 64, 64), activate_type="mish")
    input_data = tf.concat([input_data, route], axis=-1)

    input_data = convolutional(input_data, (1, 1, 128, 128), activate_type="mish")
    input_data = convolutional(input_data, (3, 3, 128, 256), downsample=True, activate_type="mish")
    route = input_data
    route = convolutional(route, (1, 1, 256, 128), activate_type="mish")
    input_data = convolutional(input_data, (1, 1, 256, 128), activate_type="mish")
    for i in range(8):
        input_data = residual_block(input_data, 128, 128, 128, activate_type="mish")
    input_data = convolutional(input_data, (1, 1, 128, 128), activate_type="mish")
    input_data = tf.concat([input_data, route], axis=-1)

    input_data = convolutional(input_data, (1, 1, 256, 256), activate_type="mish")
    route_1 = input_data
    input_data = convolutional(input_data, (3, 3, 256, 512), downsample=True, activate_type="mish")
    route = input_data
    route = convolutional(route, (1, 1, 512, 256), activate_type="mish")
    input_data = convolutional(input_data, (1, 1, 512, 256), activate_type="mish")
    for i in range(8):
        input_data = residual_block(input_data, 256, 256, 256, activate_type="mish")
    input_data = convolutional(input_data, (1, 1, 256, 256), activate_type="mish")
    input_data = tf.concat([input_data, route], axis=-1)

    input_data = convolutional(input_data, (1, 1, 512, 512), activate_type="mish")
    route_2 = input_data
    input_data = convolutional(input_data, (3, 3, 512, 1024), downsample=True, activate_type="mish")
    route = input_data
    route = convolutional(route, (1, 1, 1024, 512), activate_type="mish")
    input_data = convolutional(input_data, (1, 1, 1024, 512), activate_type="mish")
    for i in range(4):
        input_data = residual_block(input_data, 512, 512, 512, activate_type="mish")
    input_data = convolutional(input_data, (1, 1, 512, 512), activate_type="mish")
    input_data = tf.concat([input_data, route], axis=-1)

    input_data = convolutional(input_data, (1, 1, 1024, 1024), activate_type="mish")
    route_3 = input_data
    input_data = convolutional(input_data, (1, 1, 1024, 512))
    input_data = convolutional(input_data, (3, 3, 512, 1024))
    input_data = convolutional(input_data, (1, 1, 1024, 512))

    input_data = tf.concat([tf.nn.max_pool(input_data, ksize=13, padding='SAME', strides=1), tf.nn.max_pool(input_data, ksize=9, padding='SAME', strides=1)
                            , tf.nn.max_pool(input_data, ksize=5, padding='SAME', strides=1), input_data], axis=-1)
    input_data = convolutional(input_data, (1, 1, 2048, 512))
    input_data = convolutional(input_data, (3, 3, 512, 1024))
    input_data = convolutional(input_data, (1, 1, 1024, 512))

    return route_1, route_2, input_data, route_3

In [6]:
def YOLO(input_layer, NUM_CLASS):
    return YOLOv4(input_layer, NUM_CLASS)

def YOLOv4(input_layer, NUM_CLASS):
    route_1, route_2, conv, route_3 = cspdarknet53(input_layer)

    route = conv
    conv = convolutional(conv, (1, 1, 512, 256))
    conv = upsample(conv)
    route_2 = convolutional(route_2, (1, 1, 512, 256))
    conv = tf.concat([route_2, conv], axis=-1)

    conv = convolutional(conv, (1, 1, 512, 256))
    conv = convolutional(conv, (3, 3, 256, 512))
    conv = convolutional(conv, (1, 1, 512, 256))
    conv = convolutional(conv, (3, 3, 256, 512))
    conv = convolutional(conv, (1, 1, 512, 256))

    route_2 = conv
    conv = convolutional(conv, (1, 1, 256, 128))
    conv = upsample(conv)
    route_1 = convolutional(route_1, (1, 1, 256, 128))
    conv = tf.concat([route_1, conv], axis=-1)

    conv = convolutional(conv, (1, 1, 256, 128))
    conv = convolutional(conv, (3, 3, 128, 256))
    conv = convolutional(conv, (1, 1, 256, 128))
    conv = convolutional(conv, (3, 3, 128, 256))
    conv = convolutional(conv, (1, 1, 256, 128))

    route_1 = conv
    conv = convolutional(conv, (3, 3, 128, 256))
    conv_sbbox = convolutional(conv, (1, 1, 256, 3 * (NUM_CLASS + 5)), activate=False, bn=False)

    conv = convolutional(route_1, (3, 3, 128, 256), downsample=True)
    conv = tf.concat([conv, route_2], axis=-1)

    conv = convolutional(conv, (1, 1, 512, 256))
    conv = convolutional(conv, (3, 3, 256, 512))
    conv = convolutional(conv, (1, 1, 512, 256))
    conv = convolutional(conv, (3, 3, 256, 512))
    conv = convolutional(conv, (1, 1, 512, 256))

    route_2 = conv
    conv = convolutional(conv, (3, 3, 256, 512))
    conv_mbbox = convolutional(conv, (1, 1, 512, 3 * (NUM_CLASS + 5)), activate=False, bn=False)

    conv = convolutional(route_2, (3, 3, 256, 512), downsample=True)
    conv = tf.concat([conv, route], axis=-1)

    conv = convolutional(conv, (1, 1, 1024, 512))
    conv = convolutional(conv, (3, 3, 512, 1024))
    conv = convolutional(conv, (1, 1, 1024, 512))
    conv = convolutional(conv, (3, 3, 512, 1024))
    conv = convolutional(conv, (1, 1, 1024, 512))

    conv = convolutional(conv, (3, 3, 512, 1024))
    conv_lbbox = convolutional(conv, (1, 1, 1024, 3 * (NUM_CLASS + 5)), activate=False, bn=False)

    return [conv_sbbox, conv_mbbox, conv_lbbox]

In [7]:
def decode_train(conv_output, output_size, NUM_CLASS, STRIDES, ANCHORS, i=0, XYSCALE=[1, 1, 1]):
    conv_output = tf.reshape(conv_output,
                             (tf.shape(conv_output)[0], output_size, output_size, 3, 5 + NUM_CLASS))

    conv_raw_dxdy, conv_raw_dwdh, conv_raw_conf, conv_raw_prob = tf.split(conv_output, (2, 2, 1, NUM_CLASS),
                                                                          axis=-1)

    xy_grid = tf.meshgrid(tf.range(output_size), tf.range(output_size))
    xy_grid = tf.expand_dims(tf.stack(xy_grid, axis=-1), axis=2)  # [gx, gy, 1, 2]
    xy_grid = tf.tile(tf.expand_dims(xy_grid, axis=0), [tf.shape(conv_output)[0], 1, 1, 3, 1])

    xy_grid = tf.cast(xy_grid, tf.float32)

    pred_xy = ((tf.sigmoid(conv_raw_dxdy) * XYSCALE[i]) - 0.5 * (XYSCALE[i] - 1) + xy_grid) * \
              STRIDES[i]
    pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i])
    pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)

    pred_conf = tf.sigmoid(conv_raw_conf)
    pred_prob = tf.sigmoid(conv_raw_prob)

    return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)

In [8]:
NUM_CLASS=8
input_layer = tf.keras.layers.Input([416, 416, 3])
feature_maps = YOLO(input_layer, NUM_CLASS)

bbox_tensors = []
for i, fm in enumerate(feature_maps):
    if i == 0:
        bbox_tensor = decode_train(fm, 416 // 8, NUM_CLASS, np.array([8, 16, 32]), (np.array([12,16, 19,36, 40,28, 36,75, 76,55, 72,146, 142,110, 192,243, 459,401])).reshape(3, 3, 2), i, [1.2, 1.1, 1.05])
    elif i == 1:
        bbox_tensor = decode_train(fm, 416 // 16, NUM_CLASS, np.array([8, 16, 32]), (np.array([12,16, 19,36, 40,28, 36,75, 76,55, 72,146, 142,110, 192,243, 459,401])).reshape(3, 3, 2), i, [1.2, 1.1, 1.05])
    else:
        bbox_tensor = decode_train(fm, 416 // 32, NUM_CLASS, np.array([8, 16, 32]), (np.array([12,16, 19,36, 40,28, 36,75, 76,55, 72,146, 142,110, 192,243, 459,401])).reshape(3, 3, 2), i, [1.2, 1.1, 1.05])
    bbox_tensors.append(fm)
    bbox_tensors.append(bbox_tensor)

model = tf.keras.Model(input_layer, bbox_tensors)

In [9]:
# check model summary
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 416, 416, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 416, 416, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 batch_normalization (BatchNorm  (None, 416, 416, 32  128        ['conv2d[0][0]']                 
 alization)                     )                                                             

In [10]:
print(model.layers[203].output)

KerasTensor(type_spec=TensorSpec(shape=(None, 52, 52, 256), dtype=tf.float32, name=None), name='conv2d_37/Conv2D:0', description="created by layer 'conv2d_37'")


In [11]:
print(model.layers[318].output)

KerasTensor(type_spec=TensorSpec(shape=(None, 26, 26, 512), dtype=tf.float32, name=None), name='conv2d_58/Conv2D:0', description="created by layer 'conv2d_58'")


In [12]:
print(model.layers[389].output)

KerasTensor(type_spec=TensorSpec(shape=(None, 13, 13, 1024), dtype=tf.float32, name=None), name='conv2d_71/Conv2D:0', description="created by layer 'conv2d_71'")


In [13]:
# def load_weights(model, weights_file: str):
#     with open(weights_file, "rb") as fd:
#         # major, minor, revision, seen, _
#         _np_fromfile(fd, dtype=np.int32, count=5)

#         for layer in model.layers:
#             if "convolutional" in layer.name:
#                 if not yolo_conv2d_load_weights(layer, fd):
#                     break

#         if len(fd.read()) != 0:
#             raise ValueError("Model and weights file do not match.")


# def _np_fromfile(fd, dtype, count: int):
#     data = np.fromfile(fd, dtype=dtype, count=count)
#     if len(data) != count:
#         if len(data) == 0:
#             return None
#         raise ValueError("Model and weights file do not match.")
#     return data


# def yolo_conv2d_load_weights(yolo_conv2d, fd) -> bool:
#     conv2d = None
#     batch_normalization = None
#     for layer in yolo_conv2d.layers:
#         if "batch_normalization" in layer.name:
#             batch_normalization = layer
#         elif "conv2d" in layer.name:
#             conv2d = layer

#     filters = conv2d.filters

#     if batch_normalization is not None:
#         # darknet weights: [beta, gamma, mean, variance]
#         bn_weights = _np_fromfile(fd, dtype=np.float32, count=4 * filters)
#         if bn_weights is None:
#             return False
#         # tf weights: [gamma, beta, mean, variance]
#         bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]

#         batch_normalization.set_weights(bn_weights)

#     conv_bias = None
#     if conv2d.use_bias:
#         conv_bias = _np_fromfile(fd, dtype=np.float32, count=filters)
#         if conv_bias is None:
#             return False

#     # darknet shape (out_dim, in_dim, kernel_size, kernel_size)
#     conv_shape = (filters, conv2d.input_shape[-1], *conv2d.kernel_size)

#     conv_weights = _np_fromfile(
#         fd, dtype=np.float32, count=np.product(conv_shape)
#     )
#     if conv_weights is None:
#         return False
#     # tf shape (kernel_size, kernel_size, in_dim, out_dim)
#     conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0])

#     if conv_bias is None:
#         conv2d.set_weights([conv_weights])
#     else:
#         conv2d.set_weights([conv_weights, conv_bias])

#     return True

In [14]:
# def load_weights(model, weights_file, model_name='yolov4', is_tiny=False):
#     if is_tiny:
#         if model_name == 'yolov3':
#             layer_size = 13
#             output_pos = [9, 12]
#         else:
#             layer_size = 21
#             output_pos = [17, 20]
#     else:
#         if model_name == 'yolov3':
#             layer_size = 75
#             output_pos = [58, 66, 74]
#         else:
#             layer_size = 110
#             output_pos = [93, 101, 109]
#     wf = open(weights_file, 'rb')
#     major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5)

#     j = 0
#     for i in range(layer_size):

#         # if i in output_pos:
#         #     continue
#         conv_layer_name = 'conv2d_%d' %i if i > 0 else 'conv2d'
#         bn_layer_name = 'batch_normalization_%d' %j if j > 0 else 'batch_normalization'

#         conv_layer = model.get_layer(conv_layer_name)
#         filters = conv_layer.filters
#         k_size = conv_layer.kernel_size[0]
#         in_dim = conv_layer.input_shape[-1]
#         print(i)

#         if i not in output_pos:
#             # darknet weights: [beta, gamma, mean, variance]
#             bn_weights = np.fromfile(wf, dtype=np.float32, count=4 * filters)
#             print(bn_weights)
#             # tf weights: [gamma, beta, mean, variance]
#             bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]
#             bn_layer = model.get_layer(bn_layer_name)
#             j += 1
#         else:
#             # conv_bias = np.fromfile(wf, dtype=np.float32, count=filters)
#             conv_bias = np.zeros(filters)
#             print(conv_bias)
#             if i==93:
#               a = np.fromfile(wf, dtype=np.float32, count=80)
#             elif i==101:
#               a = np.fromfile(wf, dtype=np.float32, count=80)
#             elif i==109:
#               a = np.fromfile(wf, dtype=np.float32, count=80)

#         # darknet shape (out_dim, in_dim, height, width)
#         if i not in output_pos:
#           conv_shape = (filters, in_dim, k_size, k_size)
#           conv_weights = np.fromfile(wf, dtype=np.float32, count=np.product(conv_shape))
#           conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0])
#         # tf shape (height, width, in_dim, out_dim)
#         else:
#           # conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0])
#           conv_weights = np.zeros(filters, in_dim, k_size, k_size).transpose([2, 3, 1, 0])
#         if i==93:
#           a = np.fromfile(wf, dtype=np.float32, count=80*256)
#         elif i==101:
#           a = np.fromfile(wf, dtype=np.float32, count=80*512)
#         elif i==109:
#           a = np.fromfile(wf, dtype=np.float32, count=80*1024)

#         if i not in output_pos:
#             conv_layer.set_weights([conv_weights])
#             bn_layer.set_weights(bn_weights)
#         else:
#             conv_layer.set_weights([conv_weights, conv_bias])

#     # assert len(wf.read()) == 0, 'failed to read all data'
#     wf.close()

In [15]:
def load_weights(model, weights_file, model_name='yolov4', is_tiny=False):
    if is_tiny:
        if model_name == 'yolov3':
            layer_size = 13
            output_pos = [9, 12]
        else:
            layer_size = 21
            output_pos = [17, 20]
    else:
        if model_name == 'yolov3':
            layer_size = 75
            output_pos = [58, 66, 74]
        else:
            layer_size = 110
            output_pos = [93, 101, 109]
    wf = open(weights_file, 'rb')
    major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5)

    j = 0
    for i in range(layer_size):

        # if i in output_pos:
        #     continue
        conv_layer_name = 'conv2d_%d' %i if i > 0 else 'conv2d'
        bn_layer_name = 'batch_normalization_%d' %j if j > 0 else 'batch_normalization'

        conv_layer = model.get_layer(conv_layer_name)
        filters = conv_layer.filters
        k_size = conv_layer.kernel_size[0]
        in_dim = conv_layer.input_shape[-1]
        print(i)

        if i not in output_pos:
            # darknet weights: [beta, gamma, mean, variance]
            # bn_weights = np.fromfile(wf, dtype=np.float32, count=4 * filters)
            # print(bn_weights)
            # tf weights: [gamma, beta, mean, variance]
            bn_weights = np.random.randn(4, filters)[[1, 0, 2, 3]]
            bn_layer = model.get_layer(bn_layer_name)
            j += 1
        else:
            # conv_bias = np.fromfile(wf, dtype=np.float32, count=filters)
            conv_bias = np.random.randn(filters)
            print(conv_bias)

        # darknet shape (out_dim, in_dim, height, width)
        # conv_shape = (filters, in_dim, k_size, k_size)
        # conv_weights = np.fromfile(wf, dtype=np.float32, count=np.product(conv_shape))
        # # tf shape (height, width, in_dim, out_dim)
        # conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0])
        conv_weights = np.random.randn(filters, in_dim, k_size, k_size).transpose([2, 3, 1, 0])
        if i not in output_pos:
            conv_layer.set_weights([conv_weights])
            bn_layer.set_weights(bn_weights)
        else:
            conv_layer.set_weights([conv_weights, conv_bias])

    # assert len(wf.read()) == 0, 'failed to read all data'
    wf.close()

In [20]:
load_weights(model, 'yolov4.weights', 'yolov4', False)
# load_weights(model, 'yolov4.weights')


0
[ 6.75765228e+00  1.45234299e+00  1.43797445e+00 -6.22183084e+00
 -8.13172817e+00 -8.58375263e+00  1.16502881e+00  1.30729365e+00
  3.12468767e-01  1.54736042e+00  5.48846102e+00  1.65196764e+00
  3.85830545e+00  1.51468492e+00 -2.95105743e+00  1.87607110e-01
  7.59630775e+00  4.58368683e+00  3.10282612e+00  1.10981607e+00
 -1.50270319e+00  2.12788558e+00  5.57506609e+00  1.71180284e+00
 -4.19304483e-02  2.88663059e-01  2.33444238e+00  4.94528913e+00
  2.49182034e+00  3.71791172e+00  9.30891752e-01 -1.07949219e+01
  2.74113560e+00  2.81690764e+00  1.68871284e+00  6.67218876e+00
  5.08300352e+00  7.73575354e+00  9.83997434e-02  2.74505913e-01
  1.70594239e+00  3.10940480e+00  2.20089483e+00  2.98433065e+00
  2.66568279e+00  2.78101158e+00  2.99569750e+00  7.25228369e-01
  2.32301164e+00  2.19861627e+00  2.62620139e+00  3.38603401e+00
  2.02679205e+00  1.93276525e+00  2.31219363e+00  2.06490159e+00
  5.61885178e-01  2.97663593e+00  1.92588532e+00  1.48809826e+00
  3.00735617e+00  8.474

In [21]:
print(model.get_weights())

[array([[[[-4.95470554e-01,  1.86482286e+00,  1.32324263e-01,
           4.07818228e-01, -9.66788009e-02, -1.57632038e-01,
          -8.85213551e-04, -1.59049518e-02,  8.77859369e-02,
           1.92936254e+00, -1.80069685e+00, -1.67709112e+00,
           2.40181997e-01, -1.68064868e+00, -4.20151576e-02,
          -5.92249967e-02, -1.94882274e+00,  2.08459330e+00,
          -1.58136025e-01,  4.82737184e-01, -1.68163821e-01,
          -6.13700211e-01, -2.05895972e+00,  1.39093995e-01,
          -1.22976862e-01, -4.19946939e-01, -2.26263627e-01,
          -5.55333674e-01, -3.32594782e-01,  2.78454512e-01,
           2.86972761e-01, -3.56918983e-02],
         [-1.27369955e-01,  1.74926281e+00, -1.45710304e-01,
           3.10536146e-01, -1.68050781e-01,  3.15157205e-01,
          -4.50293981e-02, -9.99845937e-02,  3.15605700e-02,
           1.90216959e+00,  7.55281568e-01, -1.37390924e+00,
           2.56817698e-01, -1.57319248e+00, -5.04742786e-02,
          -1.40047401e-01,  7.82366037e

In [22]:
# Save the model in TF format
! rm yolov4.h5

model.save('yolov4.h5')
! dir *.h5

dayolo.h5  yolov4.h5


In [23]:
# Verifiy the saved model using keras
# Load the model
from keras.models import load_model, Model
yolo_model = load_model("yolov4.h5")



In [24]:
from tensorflow.python.framework import ops

class FlipGradientBuilder(object):
    def __init__(self):
        self.num_calls = 0

    def __call__(self, x, l=0.1):
        grad_name = "FlipGradient%d" % self.num_calls
        @ops.RegisterGradient(grad_name)
        def _flip_gradients(op, grad):
            return [tf.negative(grad) * l]
        
        g = tf.compat.v1.get_default_graph()
        with g.gradient_override_map({"Identity": grad_name}):
            y = tf.identity(x)
            
        self.num_calls += 1
        return y
    
flip_gradient = FlipGradientBuilder()

In [25]:
from keras.layers import Dense, Flatten


model = yolo_model
feature_ext1 = model.layers[203].output
feature_ext2 = model.layers[318].output
feature_ext3 = model.layers[389].output
yhat = model.outputs
reverse_features1 = flip_gradient(feature_ext1, 0.1)
reverse_features2 = flip_gradient(feature_ext2, 0.1)
reverse_features3 = flip_gradient(feature_ext3, 0.1)


x=ZeroPadding2D(padding=(1, 1), name= 'padd_0_1')(reverse_features1)
layer_0_0=x
x=BatchNormalization(name='norm_0')(x)
layer_0_1=x
x=Conv2D(128, 1, strides=(1,1), padding='valid', name='convo_0_1', activation= None)(x)
layer_0_2=x
x = LeakyReLU(alpha=00.1, name='leaky_rel_0')(x)
layer_0_3=x
x=ZeroPadding2D(padding=(1, 1), name= 'padd_0_2')(x)
layer_0_4=x
x=Conv2D(1, 1, strides=(1,1), padding='valid', name='convo_0_2', activation= None)(x)
layer_0_5=x
x=Flatten(name='flatten_0')(x)
layer_0_6=x
x=Dense(100, activation='relu', name='linear_0_1')(x)
layer_0_7=x
x=Dense(1, activation='sigmoid', name='linear_0_2')(x)
layer_0_8=x
domain_pred1=x


x=ZeroPadding2D(padding=(1, 1), name= 'padd_1_1')(reverse_features2)
layer_1_0=x
x=BatchNormalization(name='norm_1')(x)
layer_1_1=x
x=Conv2D(256, 1, strides=(1,1), padding='valid', name='convo_1_1', activation= None)(x)
layer_1_2=x
x = LeakyReLU(alpha=00.1, name='leaky_rel_1')(x)
layer_1_3=x
x=ZeroPadding2D(padding=(1, 1), name= 'padd_1_2')(x)
layer_1_4=x
x=Conv2D(1, 1, strides=(1,1), padding='valid', name='convo_1_2', activation= None)(x)
layer_1_5=x
x=Flatten(name='flatten_1')(x)
layer_1_6=x
x=Dense(100, activation='relu', name='linear_1_1')(x)
layer_1_7=x
x=Dense(1, activation='sigmoid', name='linear_1_2')(x)
layer_1_8=x
domain_pred2=x


x=ZeroPadding2D(padding=(1, 1), name= 'padd_2_1')(reverse_features3)
layer_2_0=x
x=BatchNormalization(name='norm_2')(x)
layer_2_1=x
x=Conv2D(512, 1, strides=(1,1), padding='valid', name='convo_2_1', activation= None)(x)
layer_2_2=x
x = LeakyReLU(alpha=00.1, name='leaky_rel_2')(x)
layer_2_3=x
x=ZeroPadding2D(padding=(1, 1), name= 'padd_2_2')(x)
layer_2_4=x
x=Conv2D(1, 1, strides=(1,1), padding='valid', name='convo_2_2', activation= None)(x)
layer_2_5=x
x=Flatten(name='flatten_2')(x)
layer_2_6=x
x=Dense(100, activation='relu', name='linear_2_1')(x)
layer_2_7=x
x=Dense(1, activation='sigmoid', name='linear_2_2')(x)
layer_2_8=x
domain_pred3=x

bbox_tensors = []                          
for i in yhat:
  bbox_tensors.append(i)

bbox_tensors.append(domain_pred1)
bbox_tensors.append(domain_pred2)
bbox_tensors.append(domain_pred3)



model=Model(model.inputs, bbox_tensors, name='DAYOLO')








In [26]:
model.summary()

Model: "DAYOLO"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 416, 416, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 416, 416, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 batch_normalization (BatchNorm  (None, 416, 416, 32  128        ['conv2d[0][0]']                 
 alization)                     )                                                            

In [27]:
for l in model.layers:
    print(l.name, l.trainable)  #setting all neck and backbone layers to be non-trainable

input_1 True
conv2d True
batch_normalization True
tf.math.softplus True
tf.math.tanh True
tf.math.multiply True
zero_padding2d True
conv2d_1 True
batch_normalization_1 True
tf.math.softplus_1 True
tf.math.tanh_1 True
tf.math.multiply_1 True
conv2d_3 True
batch_normalization_3 True
tf.math.softplus_3 True
tf.math.tanh_3 True
tf.math.multiply_3 True
conv2d_4 True
batch_normalization_4 True
tf.math.softplus_4 True
tf.math.tanh_4 True
tf.math.multiply_4 True
conv2d_5 True
batch_normalization_5 True
tf.math.softplus_5 True
tf.math.tanh_5 True
tf.math.multiply_5 True
tf.__operators__.add True
conv2d_6 True
conv2d_2 True
batch_normalization_6 True
batch_normalization_2 True
tf.math.softplus_6 True
tf.math.softplus_2 True
tf.math.tanh_6 True
tf.math.tanh_2 True
tf.math.multiply_6 True
tf.math.multiply_2 True
tf.concat True
conv2d_7 True
batch_normalization_7 True
tf.math.softplus_7 True
tf.math.tanh_7 True
tf.math.multiply_7 True
zero_padding2d_1 True
conv2d_8 True
batch_normalization_8 True
t

In [30]:
from numpy import expand_dims
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from IPython.display import display, Image
from IPython.display import display
from PIL import Image

# load and prepare an image
def load_image_pixels(filename, shape):
    # load the image to get its shape
    image = load_img(filename)
    width, height = image.size
    # load the image with the required size
    image = load_img(filename, interpolation = 'bilinear', target_size=shape)
    # convert to numpy array
    image = img_to_array(image)
    # scale pixel values to [0, 1]
    image = image.astype('float32')
    image /= 255.0

    # add a dimension so that we have one sample
    image = expand_dims(image, 0)
    
    return image, width, height

# Pre-process the image
input_w, input_h = 416, 416
photo_filename = 'aachen_000005_000019_leftImg8bit.png'
image, image_w, image_h = load_image_pixels(photo_filename, (input_w, input_h))
print("image initial size: ", image_w, image_h)
print("input image",image.shape)

yhat = model.predict(image)
print("output:",[a.shape for a in yhat])
yhat[5]

image initial size:  2048 1024
input image (1, 416, 416, 3)
output: [(1, 52, 52, 39), (1, 52, 52, 3, 13), (1, 26, 26, 39), (1, 26, 26, 3, 13), (1, 13, 13, 39), (1, 13, 13, 3, 13), (1, 1), (1, 1), (1, 1)]


array([[[[[nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan]],

         [[nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan]],

         [[nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan]],

         ...,

         [[nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan]],

         [[nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan]],

         [[nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan]]],


        [[[nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., 

In [None]:
!unzip '/content/drive/MyDrive/data.zip' -d '/content'

In [None]:
path='/content'
os.chdir(path)

In [None]:
import cv2
import random
import colorsys
import numpy as np
import tensorflow as tf
# from core.config import cfg

def load_freeze_layer(model='yolov4', tiny=False):
    if tiny:
        if model == 'yolov3':
            freeze_layouts = ['conv2d_9', 'conv2d_12']
        else:
            freeze_layouts = ['conv2d_17', 'conv2d_20']
    else:
        if model == 'yolov3':
            freeze_layouts = ['conv2d_58', 'conv2d_66', 'conv2d_74']
        else:
            freeze_layouts = ['conv2d_93', 'conv2d_101', 'conv2d_109']
    return freeze_layouts


def read_class_names(class_file_name):
    names = {}
    with open(class_file_name, 'r') as data:
        for ID, name in enumerate(data):
            names[ID] = name.strip('\n')
    return names

def load_config(FLAGS):
    if FLAGS.tiny:
        STRIDES = np.array([16, 32])
        ANCHORS = get_anchors([23,27, 37,58, 81,82, 81,82, 135,169, 344,319], FLAGS.tiny)
        XYSCALE = [1.05, 1.05] if FLAGS.model == 'yolov4' else [1, 1]
    else:
        STRIDES = np.array([8, 16, 32])
        if FLAGS.model == 'yolov4':
            ANCHORS = get_anchors([12,16, 19,36, 40,28, 36,75, 76,55, 72,146, 142,110, 192,243, 459,401], FLAGS.tiny)
        elif FLAGS.model == 'yolov3':
            ANCHORS = get_anchors([10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326], FLAGS.tiny)
        XYSCALE = [1.2, 1.1, 1.05] if FLAGS.model == 'yolov4' else [1, 1, 1]
    NUM_CLASS = 8                                                                               #num_classes

    return STRIDES, ANCHORS, NUM_CLASS, XYSCALE

def get_anchors(anchors_path, tiny=False):
    anchors = np.array(anchors_path)
    if tiny:
        return anchors.reshape(2, 3, 2)
    else:
        return anchors.reshape(3, 3, 2)

def image_preprocess(image, target_size, gt_boxes=None):

    ih, iw    = target_size
    h,  w, _  = image.shape

    scale = min(iw/w, ih/h)
    nw, nh  = int(scale * w), int(scale * h)
    image_resized = cv2.resize(image, (nw, nh))

    image_paded = np.full(shape=[ih, iw, 3], fill_value=128.0)
    dw, dh = (iw - nw) // 2, (ih-nh) // 2
    image_paded[dh:nh+dh, dw:nw+dw, :] = image_resized
    image_paded = image_paded / 255.

    if gt_boxes is None:
        return image_paded

    else:
        gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw
        gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh
        return image_paded, gt_boxes

# def draw_bbox(image, bboxes, classes=read_class_names(cfg.YOLO.CLASSES), allowed_classes=list(read_class_names(cfg.YOLO.CLASSES).values()), show_label=True):
#     num_classes = len(classes)
#     image_h, image_w, _ = image.shape
#     hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
#     colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
#     colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))

#     random.seed(0)
#     random.shuffle(colors)
#     random.seed(None)

#     out_boxes, out_scores, out_classes, num_boxes = bboxes
#     for i in range(num_boxes[0]):
#         if int(out_classes[0][i]) < 0 or int(out_classes[0][i]) > num_classes: continue
#         coor = out_boxes[0][i]
#         coor[0] = int(coor[0] * image_h)
#         coor[2] = int(coor[2] * image_h)
#         coor[1] = int(coor[1] * image_w)
#         coor[3] = int(coor[3] * image_w)

#         fontScale = 0.5
#         score = out_scores[0][i]
#         class_ind = int(out_classes[0][i])
#         class_name = classes[class_ind]

#         # check if class is in allowed classes
#         if class_name not in allowed_classes:
#             continue
#         else:
#             bbox_color = colors[class_ind]
#             bbox_thick = int(0.6 * (image_h + image_w) / 600)
#             c1, c2 = (coor[1], coor[0]), (coor[3], coor[2])
#             cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)

#             if show_label:
#                 bbox_mess = '%s: %.2f' % (classes[class_ind], score)
#                 t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick // 2)[0]
#                 c3 = (c1[0] + t_size[0], c1[1] - t_size[1] - 3)
#                 cv2.rectangle(image, c1, (np.float32(c3[0]), np.float32(c3[1])), bbox_color, -1) #filled

#                 cv2.putText(image, bbox_mess, (c1[0], np.float32(c1[1] - 2)), cv2.FONT_HERSHEY_SIMPLEX,
#                             fontScale, (0, 0, 0), bbox_thick // 2, lineType=cv2.LINE_AA)
#     return image

def bbox_iou(bboxes1, bboxes2):
    """
    @param bboxes1: (a, b, ..., 4)
    @param bboxes2: (A, B, ..., 4)
        x:X is 1:n or n:n or n:1
    @return (max(a,A), max(b,B), ...)
    ex) (4,):(3,4) -> (3,)
        (2,1,4):(2,3,4) -> (2,3)
    """
    bboxes1_area = bboxes1[..., 2] * bboxes1[..., 3]
    bboxes2_area = bboxes2[..., 2] * bboxes2[..., 3]

    bboxes1_coor = tf.concat(
        [
            bboxes1[..., :2] - bboxes1[..., 2:] * 0.5,
            bboxes1[..., :2] + bboxes1[..., 2:] * 0.5,
        ],
        axis=-1,
    )
    bboxes2_coor = tf.concat(
        [
            bboxes2[..., :2] - bboxes2[..., 2:] * 0.5,
            bboxes2[..., :2] + bboxes2[..., 2:] * 0.5,
        ],
        axis=-1,
    )

    left_up = tf.maximum(bboxes1_coor[..., :2], bboxes2_coor[..., :2])
    right_down = tf.minimum(bboxes1_coor[..., 2:], bboxes2_coor[..., 2:])

    inter_section = tf.maximum(right_down - left_up, 0.0)
    inter_area = inter_section[..., 0] * inter_section[..., 1]

    union_area = bboxes1_area + bboxes2_area - inter_area

    iou = tf.math.divide_no_nan(inter_area, union_area)

    return iou


def bbox_giou(bboxes1, bboxes2):
    """
    Generalized IoU
    @param bboxes1: (a, b, ..., 4)
    @param bboxes2: (A, B, ..., 4)
        x:X is 1:n or n:n or n:1
    @return (max(a,A), max(b,B), ...)
    ex) (4,):(3,4) -> (3,)
        (2,1,4):(2,3,4) -> (2,3)
    """
    bboxes1_area = bboxes1[..., 2] * bboxes1[..., 3]
    bboxes2_area = bboxes2[..., 2] * bboxes2[..., 3]

    bboxes1_coor = tf.concat(
        [
            bboxes1[..., :2] - bboxes1[..., 2:] * 0.5,
            bboxes1[..., :2] + bboxes1[..., 2:] * 0.5,
        ],
        axis=-1,
    )
    bboxes2_coor = tf.concat(
        [
            bboxes2[..., :2] - bboxes2[..., 2:] * 0.5,
            bboxes2[..., :2] + bboxes2[..., 2:] * 0.5,
        ],
        axis=-1,
    )

    left_up = tf.maximum(bboxes1_coor[..., :2], bboxes2_coor[..., :2])
    right_down = tf.minimum(bboxes1_coor[..., 2:], bboxes2_coor[..., 2:])

    inter_section = tf.maximum(right_down - left_up, 0.0)
    inter_area = inter_section[..., 0] * inter_section[..., 1]

    union_area = bboxes1_area + bboxes2_area - inter_area

    iou = tf.math.divide_no_nan(inter_area, union_area)

    enclose_left_up = tf.minimum(bboxes1_coor[..., :2], bboxes2_coor[..., :2])
    enclose_right_down = tf.maximum(
        bboxes1_coor[..., 2:], bboxes2_coor[..., 2:]
    )

    enclose_section = enclose_right_down - enclose_left_up
    enclose_area = enclose_section[..., 0] * enclose_section[..., 1]

    giou = iou - tf.math.divide_no_nan(enclose_area - union_area, enclose_area)

    return giou


def bbox_ciou(bboxes1, bboxes2):
    """
    Complete IoU
    @param bboxes1: (a, b, ..., 4)
    @param bboxes2: (A, B, ..., 4)
        x:X is 1:n or n:n or n:1
    @return (max(a,A), max(b,B), ...)
    ex) (4,):(3,4) -> (3,)
        (2,1,4):(2,3,4) -> (2,3)
    """
    bboxes1_area = bboxes1[..., 2] * bboxes1[..., 3]
    bboxes2_area = bboxes2[..., 2] * bboxes2[..., 3]

    bboxes1_coor = tf.concat(
        [
            bboxes1[..., :2] - bboxes1[..., 2:] * 0.5,
            bboxes1[..., :2] + bboxes1[..., 2:] * 0.5,
        ],
        axis=-1,
    )
    bboxes2_coor = tf.concat(
        [
            bboxes2[..., :2] - bboxes2[..., 2:] * 0.5,
            bboxes2[..., :2] + bboxes2[..., 2:] * 0.5,
        ],
        axis=-1,
    )

    left_up = tf.maximum(bboxes1_coor[..., :2], bboxes2_coor[..., :2])
    right_down = tf.minimum(bboxes1_coor[..., 2:], bboxes2_coor[..., 2:])

    inter_section = tf.maximum(right_down - left_up, 0.0)
    inter_area = inter_section[..., 0] * inter_section[..., 1]

    union_area = bboxes1_area + bboxes2_area - inter_area

    iou = tf.math.divide_no_nan(inter_area, union_area)

    enclose_left_up = tf.minimum(bboxes1_coor[..., :2], bboxes2_coor[..., :2])
    enclose_right_down = tf.maximum(
        bboxes1_coor[..., 2:], bboxes2_coor[..., 2:]
    )

    enclose_section = enclose_right_down - enclose_left_up

    c_2 = enclose_section[..., 0] ** 2 + enclose_section[..., 1] ** 2

    center_diagonal = bboxes2[..., :2] - bboxes1[..., :2]

    rho_2 = center_diagonal[..., 0] ** 2 + center_diagonal[..., 1] ** 2

    diou = iou - tf.math.divide_no_nan(rho_2, c_2)

    v = (
        (
            tf.math.atan(
                tf.math.divide_no_nan(bboxes1[..., 2], bboxes1[..., 3])
            )
            - tf.math.atan(
                tf.math.divide_no_nan(bboxes2[..., 2], bboxes2[..., 3])
            )
        )
        * 2
        / np.pi
    ) ** 2

    alpha = tf.math.divide_no_nan(v, 1 - iou + v)

    ciou = diou - alpha * v

    return ciou

def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
    """
    :param bboxes: (xmin, ymin, xmax, ymax, score, class)
    Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf
          https://github.com/bharatsingh430/soft-nms
    """
    classes_in_img = list(set(bboxes[:, 5]))
    best_bboxes = []

    for cls in classes_in_img:
        cls_mask = (bboxes[:, 5] == cls)
        cls_bboxes = bboxes[cls_mask]

        while len(cls_bboxes) > 0:
            max_ind = np.argmax(cls_bboxes[:, 4])
            best_bbox = cls_bboxes[max_ind]
            best_bboxes.append(best_bbox)
            cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]])
            iou = bbox_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
            weight = np.ones((len(iou),), dtype=np.float32)

            assert method in ['nms', 'soft-nms']

            if method == 'nms':
                iou_mask = iou > iou_threshold
                weight[iou_mask] = 0.0

            if method == 'soft-nms':
                weight = np.exp(-(1.0 * iou ** 2 / sigma))

            cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
            score_mask = cls_bboxes[:, 4] > 0.
            cls_bboxes = cls_bboxes[score_mask]

    return best_bboxes

def freeze_all(model, frozen=True):
    model.trainable = not frozen
    if isinstance(model, tf.keras.Model):
        for l in model.layers:
            freeze_all(l, frozen)
def unfreeze_all(model, frozen=False):
    model.trainable = not frozen
    if isinstance(model, tf.keras.Model):
        for l in model.layers:
            unfreeze_all(l, frozen)

In [None]:
import os
import cv2
import random
import numpy as np


class Dataset(object):
    """implement Dataset here"""

    def __init__(self, is_training: bool, dataset_type: str = "yolo"):                             #num_classes
        self.tiny = False
        self.strides, self.anchors, NUM_CLASS, XYSCALE = np.array([8, 16, 32]), (np.array([12,16, 19,36, 40,28, 36,75, 76,55, 72,146, 142,110, 192,243, 459,401])).reshape(3, 3, 2), 8, [1.2, 1.1, 1.05]
        self.dataset_type = dataset_type

        self.annot_path = (
            "./data/source.txt" if is_training else "./data/test_target.txt" #
        )
        self.input_sizes = (
            416 if is_training else 416
        )
        self.batch_size = (
            2 if is_training else 2
        )
        self.data_aug = True if is_training else False

        self.train_input_sizes = 416
        # self.classes = utils.read_class_names(cfg.YOLO.CLASSES)
        self.num_classes = 8                                                           #
        self.anchor_per_scale = 3
        self.max_bbox_per_scale = 150
        self.is_training = is_training
        self.annotations = self.load_annotations()
        if is_training:
          self.annotations2 = self.load_annotations2()
        self.num_samples = len(self.annotations)
        self.num_batchs = int(np.ceil(self.num_samples / self.batch_size))
        self.batch_count = 0

    def load_annotations(self):
        with open(self.annot_path, "r") as f:
            txt = f.readlines()
            if self.dataset_type == "converted_coco":
                annotations = [
                    line.strip()
                    for line in txt
                    if len(line.strip().split()[1:]) != 0
                ]
            elif self.dataset_type == "yolo":
                annotations = []
                for line in txt:
                    image_path = line.strip()
                    root, _ = os.path.splitext(image_path)
                    with open(root + ".txt") as fd:
                        boxes = fd.readlines()
                        string = ""
                        for box in boxes:
                            box = box.strip()
                            box = box.split()
                            class_num = int(box[0])
                            center_x = float(box[1])
                            center_y = float(box[2])
                            half_width = float(box[3]) / 2
                            half_height = float(box[4]) / 2
                            string += " {},{},{},{},{}".format(
                                center_x - half_width,
                                center_y - half_height,
                                center_x + half_width,
                                center_y + half_height,
                                class_num,
                            )
                        annotations.append(image_path + string)

        np.random.shuffle(annotations)
        return annotations

    def load_annotations2(self):
        with open("./data/target.txt", "r") as f:                       #
            txt = f.readlines()
            if self.dataset_type == "converted_coco":
                annotations2 = [
                    line.strip()
                    for line in txt
                    if len(line.strip().split()[1:]) != 0
                ]
            elif self.dataset_type == "yolo":
                annotations2 = []
                for line in txt:
                    image_path = line.strip()
                    annotations2.append(image_path)

        np.random.shuffle(annotations2)
        return annotations2  

    def __iter__(self):
        return self

    def __next__(self):
        with tf.device("/cpu:0"):
            # self.train_input_size = random.choice(self.train_input_sizes)
            self.train_input_size = 416
            self.train_output_sizes = self.train_input_size // self.strides

            
            batch_image = np.zeros(
                (
                    self.batch_size,
                    self.train_input_size,
                    self.train_input_size,
                    3,
                ),
                dtype=np.float32,
            )
            batch_image2 = np.zeros(
                (
                    self.batch_size,
                    self.train_input_size,
                    self.train_input_size,
                    3,
                ),
                dtype=np.float32,
            )

            batch_label_sbbox = np.zeros(
                (
                    self.batch_size,
                    self.train_output_sizes[0],
                    self.train_output_sizes[0],
                    self.anchor_per_scale,
                    5 + self.num_classes,
                ),
                dtype=np.float32,
            )
            batch_label_mbbox = np.zeros(
                (
                    self.batch_size,
                    self.train_output_sizes[1],
                    self.train_output_sizes[1],
                    self.anchor_per_scale,
                    5 + self.num_classes,
                ),
                dtype=np.float32,
            )
            batch_label_lbbox = np.zeros(
                (
                    self.batch_size,
                    self.train_output_sizes[2],
                    self.train_output_sizes[2],
                    self.anchor_per_scale,
                    5 + self.num_classes,
                ),
                dtype=np.float32,
            )

            batch_sbboxes = np.zeros(
                (self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32
            )
            batch_mbboxes = np.zeros(
                (self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32
            )
            batch_lbboxes = np.zeros(
                (self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32
            )

            num = 0
            if self.batch_count < self.num_batchs:
                while num < self.batch_size:
                    index = self.batch_count * self.batch_size + num
                    if index >= self.num_samples:
                        index -= self.num_samples
                    annotation = self.annotations[index]
                    image, bboxes = self.parse_annotation(annotation)
                    if self.is_training:
                      annotation2 = self.annotations2[index]#
                      image2 = self.parse_annotation2(annotation2)#
                      batch_image2[num, :, :, :] = image2
                    
                    (
                        label_sbbox,
                        label_mbbox,
                        label_lbbox,
                        sbboxes,
                        mbboxes,
                        lbboxes,
                    ) = self.preprocess_true_boxes(bboxes)

                    batch_image[num, :, :, :] = image
                    batch_label_sbbox[num, :, :, :, :] = label_sbbox
                    batch_label_mbbox[num, :, :, :, :] = label_mbbox
                    batch_label_lbbox[num, :, :, :, :] = label_lbbox
                    batch_sbboxes[num, :, :] = sbboxes
                    batch_mbboxes[num, :, :] = mbboxes
                    batch_lbboxes[num, :, :] = lbboxes
                    num += 1
                self.batch_count += 1
                if self.is_training:
                  batch_image = batch_image, batch_image2
                batch_smaller_target = batch_label_sbbox, batch_sbboxes
                batch_medium_target = batch_label_mbbox, batch_mbboxes
                batch_larger_target = batch_label_lbbox, batch_lbboxes

                return (
                    batch_image,
                    (
                        batch_smaller_target,
                        batch_medium_target,
                        batch_larger_target,
                    ),
                )
            else:
                self.batch_count = 0
                np.random.shuffle(self.annotations)
                if self.is_training:
                  np.random.shuffle(self.annotations2)
                raise StopIteration

    def random_horizontal_flip(self, image, bboxes = None):
        if random.random() < 0.5:
            _, w, _ = image.shape
            image = image[:, ::-1, :]
            if bboxes is not None:
              bboxes[:, [0, 2]] = w - bboxes[:, [2, 0]]

        if bboxes is None:
          return image

        return image, bboxes

    def random_crop(self, image, bboxes):
        if random.random() < 0.5:
            h, w, _ = image.shape
            max_bbox = np.concatenate(
                [
                    np.min(bboxes[:, 0:2], axis=0),
                    np.max(bboxes[:, 2:4], axis=0),
                ],
                axis=-1,
            )

            max_l_trans = max_bbox[0]
            max_u_trans = max_bbox[1]
            max_r_trans = w - max_bbox[2]
            max_d_trans = h - max_bbox[3]

            crop_xmin = max(
                0, int(max_bbox[0] - random.uniform(0, max_l_trans))
            )
            crop_ymin = max(
                0, int(max_bbox[1] - random.uniform(0, max_u_trans))
            )
            crop_xmax = max(
                w, int(max_bbox[2] + random.uniform(0, max_r_trans))
            )
            crop_ymax = max(
                h, int(max_bbox[3] + random.uniform(0, max_d_trans))
            )

            image = image[crop_ymin:crop_ymax, crop_xmin:crop_xmax]

            bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin
            bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin

        return image, bboxes

    def random_translate(self, image, bboxes):
        if random.random() < 0.5:
            h, w, _ = image.shape
            max_bbox = np.concatenate(
                [
                    np.min(bboxes[:, 0:2], axis=0),
                    np.max(bboxes[:, 2:4], axis=0),
                ],
                axis=-1,
            )

            max_l_trans = max_bbox[0]
            max_u_trans = max_bbox[1]
            max_r_trans = w - max_bbox[2]
            max_d_trans = h - max_bbox[3]

            tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1))
            ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1))

            M = np.array([[1, 0, tx], [0, 1, ty]])
            image = cv2.warpAffine(image, M, (w, h))

            bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx
            bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty

        return image, bboxes

    def parse_annotation(self, annotation):
        line = annotation.split()
        image_path = line[0]
        if not os.path.exists(image_path):
            raise KeyError("%s does not exist ... " % image_path)
        image = cv2.imread(image_path)
        if self.dataset_type == "converted_coco":
            bboxes = np.array(
                [list(map(int, box.split(","))) for box in line[1:]]
            )
        elif self.dataset_type == "yolo":
            height, width, _ = image.shape
            bboxes = np.array(
                [list(map(float, box.split(","))) for box in line[1:]]
            )
            bboxes = bboxes * np.array([width, height, width, height, 1])
            bboxes = bboxes.astype(np.int64)

        if self.data_aug:
            image, bboxes = self.random_horizontal_flip(
                np.copy(image), np.copy(bboxes)
            )
            image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes))
            image, bboxes = self.random_translate(
                np.copy(image), np.copy(bboxes)
            )

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image, bboxes = image_preprocess(
            np.copy(image),
            [self.train_input_size, self.train_input_size],
            np.copy(bboxes),
        )
        return image, bboxes

    def parse_annotation2(self, annotation):
        image_path = annotation ##
        if not os.path.exists(image_path):
            raise KeyError("%s does not exist ... " % image_path)
        image = cv2.imread(image_path)
        # if self.dataset_type == "converted_coco":
        #     bboxes = np.array(
        #         [list(map(int, box.split(","))) for box in line[1:]]
        #     )
        if self.dataset_type == "yolo": #
            height, width, _ = image.shape

        if self.data_aug:
            image = self.random_horizontal_flip(np.copy(image))
            # image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes))        ---->what to do .... unable to reproduce rand crop and rand transl without bounding boxes????
            # image, bboxes = self.random_translate(
            #     np.copy(image), np.copy(bboxes)
            # )

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = image_preprocess(
            np.copy(image),
            [self.train_input_size, self.train_input_size]
        )
        return image

    def preprocess_true_boxes(self, bboxes):
        label = [
            np.zeros(
                (
                    self.train_output_sizes[i],
                    self.train_output_sizes[i],
                    self.anchor_per_scale,
                    5 + self.num_classes,
                )
            )
            for i in range(3)
        ]
        bboxes_xywh = [np.zeros((self.max_bbox_per_scale, 4)) for _ in range(3)]
        bbox_count = np.zeros((3,))

        for bbox in bboxes:
            bbox_coor = bbox[:4]
            bbox_class_ind = bbox[4]

            onehot = np.zeros(self.num_classes, dtype=np.float)
            onehot[bbox_class_ind] = 1.0
            uniform_distribution = np.full(
                self.num_classes, 1.0 / self.num_classes
            )
            deta = 0.01
            smooth_onehot = onehot * (1 - deta) + deta * uniform_distribution

            bbox_xywh = np.concatenate(
                [
                    (bbox_coor[2:] + bbox_coor[:2]) * 0.5,
                    bbox_coor[2:] - bbox_coor[:2],
                ],
                axis=-1,
            )
            bbox_xywh_scaled = (
                1.0 * bbox_xywh[np.newaxis, :] / self.strides[:, np.newaxis]
            )

            iou = []
            exist_positive = False
            for i in range(3):
                anchors_xywh = np.zeros((self.anchor_per_scale, 4))
                anchors_xywh[:, 0:2] = (
                    np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5
                )
                anchors_xywh[:, 2:4] = self.anchors[i]

                iou_scale = bbox_iou(
                    bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh
                )
                iou.append(iou_scale)
                iou_mask = iou_scale > 0.3

                if np.any(iou_mask):
                    xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(
                        np.int32
                    )

                    label[i][yind, xind, iou_mask, :] = 0
                    label[i][yind, xind, iou_mask, 0:4] = bbox_xywh
                    label[i][yind, xind, iou_mask, 4:5] = 1.0
                    label[i][yind, xind, iou_mask, 5:] = smooth_onehot

                    bbox_ind = int(bbox_count[i] % self.max_bbox_per_scale)
                    bboxes_xywh[i][bbox_ind, :4] = bbox_xywh
                    bbox_count[i] += 1

                    exist_positive = True

            if not exist_positive:
                best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1)
                best_detect = int(best_anchor_ind / self.anchor_per_scale)
                best_anchor = int(best_anchor_ind % self.anchor_per_scale)
                xind, yind = np.floor(
                    bbox_xywh_scaled[best_detect, 0:2]
                ).astype(np.int32)

                label[best_detect][yind, xind, best_anchor, :] = 0
                label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh
                label[best_detect][yind, xind, best_anchor, 4:5] = 1.0
                label[best_detect][yind, xind, best_anchor, 5:] = smooth_onehot

                bbox_ind = int(
                    bbox_count[best_detect] % self.max_bbox_per_scale
                )
                bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh
                bbox_count[best_detect] += 1
        label_sbbox, label_mbbox, label_lbbox = label
        sbboxes, mbboxes, lbboxes = bboxes_xywh
        return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes

    def __len__(self):
        return self.num_batchs

In [None]:
def compute_loss(pred, conv, label, bboxes, STRIDES, NUM_CLASS, IOU_LOSS_THRESH, i=0):
    conv_shape  = tf.shape(conv)
    batch_size  = conv_shape[0]
    output_size = conv_shape[1]
    input_size  = STRIDES[i] * output_size
    conv = tf.reshape(conv, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))

    conv_raw_conf = conv[:, :, :, :, 4:5]
    conv_raw_prob = conv[:, :, :, :, 5:]

    pred_xywh     = pred[:, :, :, :, 0:4]
    pred_conf     = pred[:, :, :, :, 4:5]

    label_xywh    = label[:, :, :, :, 0:4]
    respond_bbox  = label[:, :, :, :, 4:5]
    label_prob    = label[:, :, :, :, 5:]

    giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1)
    input_size = tf.cast(input_size, tf.float32)

    bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2:3] * label_xywh[:, :, :, :, 3:4] / (input_size ** 2)
    giou_loss = respond_bbox * bbox_loss_scale * (1- giou)

    iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
    max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1)

    respond_bgd = (1.0 - respond_bbox) * tf.cast( max_iou < IOU_LOSS_THRESH, tf.float32 )

    conf_focal = tf.pow(respond_bbox - pred_conf, 2)

    conf_loss = conf_focal * (
            respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
            +
            respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
    )

    prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_prob, logits=conv_raw_prob)

    giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1,2,3,4]))
    conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1,2,3,4]))
    prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1,2,3,4]))

    return giou_loss, conf_loss, prob_loss

In [None]:
from absl import app, flags, logging
import numpy as np
import shutil

# flags.DEFINE_string('model', 'yolov4', 'yolov4, yolov3')
# # flags.DEFINE_string('weights', './scripts/yolov4.weights', 'pretrained weights')
# flags.DEFINE_boolean('tiny', False, 'yolo or yolo-tiny')


trainset = Dataset(is_training=True)
testset = Dataset(is_training=False)
logdir = "./data/log"
isfreeze = False
steps_per_epoch = len(trainset)
global_steps = tf.Variable(1, trainable=False, dtype=tf.int64)
warmup_steps = 2 * steps_per_epoch
total_steps = (50) * steps_per_epoch
# train_steps = (first_stage_epochs + second_stage_epochs) * steps_per_period

STRIDES, ANCHORS, NUM_CLASS, XYSCALE = np.array([8, 16, 32]), (np.array([12,16, 19,36, 40,28, 36,75, 76,55, 72,146, 142,110, 192,243, 459,401])).reshape(3, 3, 2), 8, [1.2, 1.1, 1.05]
IOU_LOSS_THRESH = 0.5

optimizer = tf.keras.optimizers.Adam()

if os.path.exists(logdir): shutil.rmtree(logdir)
writer = tf.summary.create_file_writer(logdir)

freeze_layers = load_freeze_layer('yolov4', False)

def train_step(image_data, target):
    with tf.GradientTape() as tape:
        print(image_data[0].shape)
        print(image_data[:][0].shape)
        pred_result = model(image_data[0], training=True)
        giou_loss = conf_loss = prob_loss = 0
        # pred_result[6], pred_result[7], pred_result[8] are domain predictions for source dataset
        pred_result2 = model(image_data[1], training=True)
        # ignore pred_result[0:6], pred_result[6], pred_result[7], pred_result[8] are domain predictions for target dataset
        for i in range(6, 9):
          pred_result[i] = pred_result[i].numpy()
          pred_result[i] = np.transpose(pred_result[i])
          pred_result2[i] = pred_result2[i].numpy()
          pred_result2[i] = np.transpose(pred_result2[i])

        # optimizing process
        for i in range(len(freeze_layers)):
            conv, pred = pred_result[i * 2], pred_result[i * 2 + 1]
            loss_items = compute_loss(pred, conv, target[i][0], target[i][1], STRIDES=STRIDES, NUM_CLASS=NUM_CLASS, IOU_LOSS_THRESH=IOU_LOSS_THRESH, i=i)
            giou_loss += loss_items[0]
            conf_loss += loss_items[1]
            prob_loss += loss_items[2]

        source_yolo_loss = giou_loss + conf_loss + prob_loss

        y_true_1 = [[1, 1], [0, 0]]                                     # 1 for source 0 for target
        y_pred_1 = [[pred_result[6][0][0],pred_result[6][0][1]], [pred_result2[6][0][0],pred_result2[6][0][1]]]
        y_true_2 = [[1, 1], [0, 0]]
        y_pred_2 = [[pred_result[7][0][0],pred_result[7][0][1]], [pred_result2[7][0][0],pred_result2[7][0][1]]]
        y_true_3 = [[1, 1], [0, 0]]
        y_pred_3 = [[pred_result[8][0][0],pred_result[8][0][1]], [pred_result2[8][0][0],pred_result2[8][0][1]]]



        bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
        
        domain_loss=0

        domain_loss += bce(y_true_1, y_pred_1).numpy()
        domain_loss += bce(y_true_2, y_pred_2).numpy()
        domain_loss += bce(y_true_3, y_pred_3).numpy()

        total_loss = domain_loss + source_yolo_loss

        

        gradients = tape.gradient(total_loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        tf.print("=> STEP %4d/%4d   lr: %.6f   giou_loss: %4.2f   conf_loss: %4.2f   "
                  "prob_loss: %4.2f  domain_loss: %4.2f  total_loss: %4.2f" % (global_steps, total_steps, optimizer.lr.numpy(),
                                                            giou_loss, conf_loss,
                                                            prob_loss, domain_loss, total_loss))
        # update learning rate
        global_steps.assign_add(1)
        if global_steps < warmup_steps:
            lr = global_steps / warmup_steps * 1e-3
        else:
            lr = 1e-6 + 0.5 * (1e-3 - 1e-6) * (
                (1 + tf.cos((global_steps - warmup_steps) / (total_steps - warmup_steps) * np.pi))
            )
        optimizer.lr.assign(lr.numpy())

        # writing summary data
        with writer.as_default():
            tf.summary.scalar("lr", optimizer.lr, step=global_steps)
            tf.summary.scalar("loss/total_loss", total_loss, step=global_steps)
            tf.summary.scalar("loss/giou_loss", giou_loss, step=global_steps)
            tf.summary.scalar("loss/conf_loss", conf_loss, step=global_steps)
            tf.summary.scalar("loss/prob_loss", prob_loss, step=global_steps)
        writer.flush()

def test_step(image_data, target):
    with tf.GradientTape() as tape:
        pred_result = dayolo(image_data, training=True)
        giou_loss = conf_loss = prob_loss = 0

        # optimizing process
        for i in range(len(freeze_layers)):
            conv, pred = pred_result[i * 2], pred_result[i * 2 + 1]
            loss_items = compute_loss(pred, conv, target[i][0], target[i][1], STRIDES=STRIDES, NUM_CLASS=NUM_CLASS, IOU_LOSS_THRESH=IOU_LOSS_THRESH, i=i)
            giou_loss += loss_items[0]
            conf_loss += loss_items[1]
            prob_loss += loss_items[2]

        total_loss = giou_loss + conf_loss + prob_loss

        tf.print("=> TEST STEP %4d   giou_loss: %4.2f   conf_loss: %4.2f   "
                  "prob_loss: %4.2f   total_loss: %4.2f" % (global_steps, giou_loss, conf_loss,
                                                             prob_loss, total_loss))

for epoch in range(50):
    for image_data, target in trainset:
        train_step(image_data, target)
    for image_data, target in testset:
        test_step(image_data, target)
    model.save_weights("./checkpoints/dayolo")

      