In [1]:
from google.colab import drive
import os
import sys
from IPython.display import HTML, display
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from tensorflow.keras.layers import Input, Dense, Flatten, Dropout, Concatenate, Add, Dot, Multiply, Reshape, Activation, LeakyReLU, BatchNormalization, SimpleRNNCell, RNN, SimpleRNN, LSTM, Embedding, Bidirectional, TimeDistributed, Conv1D, Conv2D, MaxPool1D, MaxPool2D, GlobalMaxPool1D, GlobalMaxPool2D, AveragePooling1D, AveragePooling2D, GlobalAveragePooling1D, GlobalAveragePooling2D, ZeroPadding2D
from tensorflow.keras.layers.experimental.preprocessing import Rescaling
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras import Input, Model, Sequential
from sklearn.model_selection import train_test_split
import cv2
import time
import random
import colorsys
import numpy as np
from tensorflow.keras.regularizers import l2
from google.colab.patches import cv2_imshow

drive.mount("/content/drive", force_remount=True)
try:
    my_path = "/content/notebooks"
    os.symlink("/content/drive/MyDrive/ColabNotebooks/my_env", my_path)
    sys.path.insert(0, my_path)
except:
    pass
os.chdir(my_path)

def set_css():
  display(HTML("""
  <style>
    pre {white-space: pre-wrap;}
  </style>
  """))
get_ipython().events.register("pre_run_cell", set_css)

plt.style.use("dark_background")

Mounted at /content/drive


In [2]:
input_size = 416
batch_size = 4
n_grids = [52, 26, 13]
strides = [416//grid for grid in n_grids]
anchors = [[[10, 13], [16, 30], [33, 23]], [[30, 61], [62, 45], [59, 119]], [[116, 90], [156, 198],
                                                                             [373, 326]]]
anchors = anchors/(np.array(strides).T[:, None, None])
max_bbox_per_scale = 100

init_lr = 1e-4
fin_lr = 1e-6
warmup_epochs = 2
epochs = 30

pref = "/content/drive/My Drive/Computer Vision/racoon_data"
tr_annot_path = pref + "/racoon_train.txt"
test_annot_path = pref + "/racoon_test.txt"

In [3]:
# Define Genralized IoU
def compute_giou(bbox1, bbox2):
    # boxes : (x1, y1, x2, y2)
    bbox1 = np.array(bbox1)
    bbox2 = np.array(bbox2)

    area_bbox1 = (bbox1[2] - bbox1[0])*(bbox1[3] - bbox1[1])
    area_bbox2 = (bbox2[2] - bbox2[0])*(bbox2[3] - bbox2[1])

    pt1_intersec = np.maximum(bbox1[:2], bbox2[:2])
    pt2_intersec = np.minimum(bbox1[2:], bbox2[2:])
    w_intersec, h_intersec = np.maximum(pt2_intersec - pt1_intersec, 0)
    area_intersec = w_intersec*h_intersec

    area_union = area_bbox1 + area_bbox2 - area_intersec

    iou = np.maximum(area_intersec/area_union, np.finfo(np.float32).eps)

    pt1_enclose = np.minimum(bbox1[:2], bbox2[:2])
    pt2_enclose = np.maximum(bbox1[2:], bbox2[2:])
    w_enclose, h_enclose = np.maximum(pt2_enclose - pt1_enclose, 0)
    area_enclose = w_enclose*h_enclose

    return iou - (area_enclose - area_union)/area_enclose

def preprocess_image(img, gt_boxes=None):
    tar_h = input_size
    tar_w = input_size
    h, w, _ = img.shape

    scale = min(tar_h/h, tar_w/w)

    new_w, new_h = int(scale*w), int(scale*h)
    img_resized = cv2.resize(img, dsize=(new_w, new_h))

    img_paded = np.full(shape=(tar_h, tar_w, 3), fill_value=128.)
    pad_w, pad_h = (tar_w - new_w)//2, (tar_h - new_h)//2
    img_paded[pad_h:pad_h+new_h, pad_w:pad_w+new_w, :] = img_resized
    img_paded = img_paded/255.

    if gt_boxes is None:
        return img_paded
    else:
        # (x1, y1, x2, y2)
        gt_boxes[:, (0, 2)] = gt_boxes[:, (0, 2)]*scale + pad_w
        gt_boxes[:, (1, 3)] = gt_boxes[:, (1, 3)]*scale + pad_h
        return img_paded, gt_boxes

In [4]:
class BatchNormalization(BatchNormalization):
    # When `layer.trainable=False` is set(inference mode), the layer is frozen and will use stored
    # moving `var` and `mean` and both `gamma` and `beta` will not be updated.
    def call(self, x, training=False):
        if not training:
            training = tf.constant(False)
        training = tf.logical_and(training, self.trainable)
        return super().call(x, training)

def convolutional(x, filters, kernel_size, downsample=False, activate=True, bn=True):
    if downsample == False:
        strides = 1
        padding = "same"
        z = x
    elif downsample == True:
        # top and left padding
        # shape: (batch_size, h, w, channels) -> (batch_size, h+1, w+1, channels)
        # the image size of the output is half the input.
        z = ZeroPadding2D(padding=((1, 0), (1, 0)))(x)
        strides = 2
        padding = "valid"

    z = Conv2D(filters=filters, kernel_size=kernel_size, strides=strides,
                padding=padding, use_bias=not bn, kernel_regularizer=l2(0.0005),
                kernel_initializer=tf.random_normal_initializer(stddev=0.01),
                bias_initializer=tf.constant_initializer(0.))(z)
    if bn == True:
        z = BatchNormalization()(z)
    if activate == True:
        z = LeakyReLU(alpha=0.1)(z)

    return z

def residual_block(x, filters):
    z = convolutional(x, filters=filters[0], kernel_size=1)
    z = convolutional(z, filters=filters[1], kernel_size=3)

    return Add()([x, z])

def upsample(x):
    return tf.image.resize(images=x, size=(x.shape[1]*2, x.shape[2]*2), method="nearest")

In [5]:
# pred: pred, conv:, label: gt
def compute_loss(pred, conv, label, bboxes, i=0):
    # batch_size  = tf.shape(conv)[0]
    # output_size = n_grids[i]
    # (batch_size, n_grids[i], n_grids[i], 3*(5 + n_clss))
    # -> (batch_size, n_grids[i], n_grids[i], 3, 5 + n_clss)
    conv = tf.reshape(conv, (batch_size, n_grids[i], n_grids[i], 3, 5 + n_clss))
    conv_raw_conf = conv[:, :, :, :, 4]
    conv_raw_prob = conv[:, :, :, :, 5:]

    pred_xywh     = pred[:, :, :, :, 0:4]
    pred_conf     = pred[:, :, :, :, 4] #시그머이드 반영

    label_xywh    = label[:, :, :, :, 0:4]
    label_conf  = label[:, :, :, :, 4] #있으면 1 아니면 0
    label_prob    = label[:, :, :, :, 5:] #해당 클래스 있는것만 1 나머지 0

    # print(pred_xywh.shape, label_xywh[..., None].shape)
    # giou = compute_giou(pred_xywh, label_xywh)[..., None]
    giou = np.array([compute_giou(pred_xywh, xywh) for xywh in label_xywh])[..., None]
    # input_size = tf.cast(input_size, tf.float32)

    bbox_loss_scale = 2.0 - 1.0*label_xywh[:, :, :, :, 2]*label_xywh[:, :, :, :, 3]/(input_size**2)
    giou_loss = label_conf*bbox_loss_scale*(1 - giou)

    # iou = bbox_iou(pred_xywh[:, :, :, :, None, :], bboxes[:, None, None, None, :, :])
    iou = np.array([compute_giou(pred_xywh[:, :, :, :, None, :], bbox)\
                    for bbox in bboxes[:, None, None, None, :, :]])
    max_iou = tf.reduce_max(iou, axis=-1)[..., None]

    respond_bgd = (1.0 - label_conf)*tf.cast(max_iou < iou_thrsd, tf.float32)
# RETINANET
    # conf_focal = tf.pow(label_conf - pred_conf, 2)
    conf_focal = (label_conf - pred_conf)**2

    conf_loss = conf_focal*(label_conf*tf.nn.sigmoid_cross_entropy_with_logits(labels=label_conf,
                                                                                 logits=conv_raw_conf)
            +respond_bgd*tf.nn.sigmoid_cross_entropy_with_logits(labels=label_conf, logits=conv_raw_conf))

    prob_loss = label_conf*tf.nn.sigmoid_cross_entropy_with_logits(labels=label_prob, logits=conv_raw_prob)

    giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1, 2, 3, 4]))
    conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1, 2, 3, 4]))
    prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1, 2, 3, 4]))

    return giou_loss, conf_loss, prob_loss

idx2cls = {}
with open("/content/drive/My Drive/Computer Vision/model_data/coco.names", "r") as data:
    for idx, cls in enumerate(data):
        idx2cls[idx] = cls.strip("\n")
n_clss = len(idx2cls)

In [6]:
def YOLOv3(training=False):
    input_size = 416
    channels = 3
    inputs  = Input([input_size, input_size, channels])

    # Darknet 53 from here(totally 75 layers).
    z = convolutional(inputs, filters=32, kernel_size=3)
    z = convolutional(z, filters=64, kernel_size=3, downsample=True)
    for _ in range(1):
        z = residual_block(z, filters=[32, 64])
    z = convolutional(z, filters=128, kernel_size=3, downsample=True)
    for _ in range(2):
        z = residual_block(z, filters=[64, 128])
    z = convolutional(z, filters=256, kernel_size=3, downsample=True)
    for _ in range(8):
        z = residual_block(z, filters=[128, 256])
    route1 = z

    z = convolutional(z, filters=512, kernel_size=3, downsample=True)
    for _ in range(8):
        z = residual_block(z, filters=[256, 512])
    route2 = z

    z = convolutional(z, filters=1024, kernel_size=3, downsample=True)
    for _ in range(4):
        z = residual_block(z, filters=[512, 1024])

    # YOLO v3 from here(totally 31 layers).
    z = convolutional(z, filters=512, kernel_size=1)
    z = convolutional(z, filters=1024, kernel_size=3)
    z = convolutional(z, filters=512, kernel_size=1)
    z = convolutional(z, filters=1024, kernel_size=3)
    z = convolutional(z, filters=512, kernel_size=1)

    conv_lobj_branch = convolutional(z, filters=1024, kernel_size=3)
    # (batch_size, 13, 13, 3*(n_clss + 5))
    conv_lbbox = convolutional(conv_lobj_branch, filters=3*(n_clss + 5), kernel_size=1,
                                activate=False, bn=False)

    z = convolutional(z, filters=256, kernel_size=1)
    z = upsample(z)

    z = Concatenate(axis=-1)([z, route2])

    z = convolutional(z, filters=256, kernel_size=1)
    z = convolutional(z, filters=512, kernel_size=3)
    z = convolutional(z, filters=256, kernel_size=1)
    z = convolutional(z, filters=512, kernel_size=3)
    z = convolutional(z, filters=256, kernel_size=1)

    conv_mobj_branch = convolutional(z, filters=512, kernel_size=3)
    # (batch_size, 26, 26, 3*(n_clss + 5))
    conv_mbbox = convolutional(conv_mobj_branch, filters=3*(n_clss + 5), kernel_size=1,
                                activate=False, bn=False)

    z = convolutional(z, filters=128, kernel_size=1)
    z = upsample(z)

    z = Concatenate(axis=-1)([z, route1])

    z = convolutional(z, filters=128, kernel_size=1)
    z = convolutional(z, filters=256, kernel_size=3)
    z = convolutional(z, filters=128, kernel_size=1)
    z = convolutional(z, filters=256, kernel_size=3)
    z = convolutional(z, filters=128, kernel_size=1)

    conv_sobj_branch = convolutional(z, filters=256, kernel_size=3)
    # (batch_size, 52, 52, 3*(n_clss + 5))
    conv_sbbox = convolutional(conv_sobj_branch, filters=3*(n_clss + 5), kernel_size=1,
                                activate=False, bn=False)

    outputs = []
    for i, conv_bbox in enumerate([conv_sbbox, conv_mbbox, conv_lbbox]):
        if training == True:
            outputs.append(conv_bbox)
        # (batch_size, output_size, output_size, 3*(5 + n_clss)
        # batch_size = tf.shape(conv_bbox)[0]
        output_size = n_grids[i]
        conv_bbox = tf.reshape(conv_bbox, shape=(batch_size, output_size, output_size, 3, 5 + n_clss))
        delta_xy = conv_bbox[:, :, :, :, 0:2]   
        delta_wh = conv_bbox[:, :, :, :, 2:4]
        conf = conv_bbox[:, :, :, :, 4:5]
        probs = conv_bbox[:, :, :, :, 5: ] 

        y = tf.range(output_size, dtype=tf.int32)
        y = tf.expand_dims(y, -1)
        y = tf.tile(y, [1, output_size])
        x = tf.range(output_size,dtype=tf.int32)
        x = tf.expand_dims(x, 0)
        x = tf.tile(x, [output_size, 1])
        xy_grid = tf.concat([x[:, :, None], y[:, :, None]], axis=-1)
        # (output_size, output_size, 2) -> (batch_size, output_size, output_size, 3, 2)
        xy_grid = tf.tile(xy_grid[None, :, :, None, :], [batch_size, 1, 1, 3, 1])
        xy_grid = tf.cast(xy_grid, tf.float32)

        # The center of the predicted bboxes in the original 416x416 image space.
        xy = (tf.math.sigmoid(delta_xy) + xy_grid)*strides[i]
        wh = (tf.math.exp(delta_wh)*anchors[i])*strides[i]
        # xywh = tf.concat([xy, wh], axis=-1)
        conf = tf.math.sigmoid(conf)
        probs = tf.math.sigmoid(probs)
                
        # [(batch_size, 52, 52, 3, 85), (batch_size, 26, 26, 3 85), (batch_size, 13, 13, 3, 85)]
        outputs.append(tf.concat([xy, wh, conf, probs], axis=-1))
        # outputs.append(tf.concat([xywh, conf, probs], axis=-1))

    return Model(inputs=inputs, outputs=outputs)

In [7]:
class Dataset(object):
    # Dataset preprocess implementation
    def __init__(self, dataset_type):
        # (x, y, w, h)
        self.annot_path = tr_annot_path if dataset_type == "train" else test_annot_path
        self.data_aug = True if dataset_type == "train" else False

        with open(tr_annot_path, "r") as f:
            txt = f.readlines()
            annots = [line.strip() for line in txt if len(line.strip().split()[1:]) != 0]
            random.shuffle(annots)
        self.final_annots = list()
        for annot in annots:
            img_path, bbox = annot.split()
            self.final_annots.append([img_path, [bbox]])

        self.n_samples = len(self.final_annots)
        self.n_batchs = int(np.ceil(self.n_samples/batch_size))
        self.batch_count = 0

    def __iter__(self):
        return self

    def __next__(self):
        # self.tr_input_size = random.choice([self.tr_input_sizes])

        batch_img = np.zeros((batch_size, 416, 416, 3), dtype=np.float32)

        batch_label_sbbox = np.zeros((batch_size, n_grids[0], n_grids[0],
                                        3, 5 + n_clss), dtype=np.float32)
        batch_label_mbbox = np.zeros((batch_size, n_grids[1], n_grids[1],
                                        3, 5 + n_clss), dtype=np.float32)
        batch_label_lbbox = np.zeros((batch_size, n_grids[2], n_grids[2],
                                        3, 5 + n_clss), dtype=np.float32)

        batch_sbboxes = np.zeros((batch_size, max_bbox_per_scale, 4), dtype=np.float32)
        batch_mbboxes = np.zeros((batch_size, max_bbox_per_scale, 4), dtype=np.float32)
        batch_lbboxes = np.zeros((batch_size, max_bbox_per_scale, 4), dtype=np.float32)

        num = 0
        if self.batch_count < self.n_batchs:
            while num < batch_size:
                idx = self.batch_count*batch_size + num
                if idx >= self.n_samples:
                    idx -= self.n_samples
                annot = self.final_annots[idx]
                # Parse annotation
                img = cv2.imread(pref + annot[0][1:])
                bboxes = np.array([list(map(int, box.split(','))) for box in annot[1]])
                # print(bboxes)
                if self.data_aug:
                    img, bboxes = self.random_horizontal_flip(img, bboxes)
                    img, bboxes = self.random_crop(img, bboxes)
                    img, bboxes = self.random_translate(img, bboxes)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img, bboxes = preprocess_image(img=img, gt_boxes=bboxes)

                # Preprocess GT bboxes.
                # def preprocess_true_boxes(self, bboxes):
                label = [np.zeros((n_grids[i], n_grids[i], 3,
                                5 + n_clss)) for i in range(3)]
                bboxes_xywh = [np.zeros((max_bbox_per_scale, 4)) for _ in range(3)]
                bbox_count = np.zeros((3,))

                for bbox in bboxes:
                    bbox_coor = bbox[:4]
                    bbox_class_ind = bbox[4]

                    onehot = np.zeros(n_clss, dtype=np.float)
                    onehot[bbox_class_ind] = 1.0
                    uniform_distribution = np.full(n_clss, 1.0 / n_clss)
                    deta = 0.01
                    smooth_onehot = onehot*(1 - deta) + deta*uniform_distribution

                    bbox_xywh = np.concatenate([(bbox_coor[2:] + bbox_coor[:2])*0.5, bbox_coor[2:] - bbox_coor[:2]], axis=-1)
                    bbox_xywh_scaled = 1.0*bbox_xywh[None, :] / strides[:, None]

                    iou = []
                    exist_positive = False
                    for i in range(3):
                        anchors_xywh = np.zeros((3, 4))
                        anchors_xywh[:, 0:2] = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5
                        anchors_xywh[:, 2:4] = anchors[i]

                        # iou_scale = bbox_iou(bbox_xywh_scaled[i][None, :], anchors_xywh)
                        # iou_scale = compute_giou(bbox_xywh_scaled[i][None, :], anchors_xywh)
                        iou_scale = np.array([compute_giou(bbox_xywh_scaled[i][None, :][0], anchor_xywh)\
                                                 for anchor_xywh in anchors_xywh])
                        iou.append(iou_scale)
                        iou_mask = iou_scale > 0.3

                        if np.any(iou_mask):
                            xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32)

                            label[i][yind, xind, iou_mask, :] = 0
                            label[i][yind, xind, iou_mask, 0:4] = bbox_xywh
                            label[i][yind, xind, iou_mask, 4:5] = 1.0
                            label[i][yind, xind, iou_mask, 5:] = smooth_onehot

                            bbox_ind = int(bbox_count[i] % max_bbox_per_scale)
                            bboxes_xywh[i][bbox_ind, :4] = bbox_xywh
                            bbox_count[i] += 1

                            exist_positive = True

                    if not exist_positive:
                        best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1)
                        best_detect = int(best_anchor_ind / 3)
                        best_anchor = int(best_anchor_ind % 3)
                        xind, yind = np.floor(bbox_xywh_scaled[best_detect, 0:2]).astype(np.int32)

                        label[best_detect][yind, xind, best_anchor, :] = 0
                        label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh
                        label[best_detect][yind, xind, best_anchor, 4:5] = 1.0
                        label[best_detect][yind, xind, best_anchor, 5:] = smooth_onehot
                        
                        bbox_ind = int(bbox_count[best_detect] % max_bbox_per_scale)
                        bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh
                        bbox_count[best_detect] += 1

                label_sbbox, label_mbbox, label_lbbox = label
                sbboxes, mbboxes, lbboxes = bboxes_xywh
                    # return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes


                # label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes =\
                # self.preprocess_true_boxes(bboxes)

                batch_img[num, :, :, :] = img
                batch_label_sbbox[num, :, :, :, :] = label_sbbox
                batch_label_mbbox[num, :, :, :, :] = label_mbbox
                batch_label_lbbox[num, :, :, :, :] = label_lbbox
                batch_sbboxes[num, :, :] = sbboxes
                batch_mbboxes[num, :, :] = mbboxes
                batch_lbboxes[num, :, :] = lbboxes
                num += 1
            self.batch_count += 1
            batch_smaller_target = batch_label_sbbox, batch_sbboxes
            batch_medium_target  = batch_label_mbbox, batch_mbboxes
            batch_larger_target  = batch_label_lbbox, batch_lbboxes

            return batch_img, (batch_smaller_target, batch_medium_target, batch_larger_target)
        else:
            self.batch_count = 0
            np.random.shuffle(self.final_annots)
            raise StopIteration

    def random_horizontal_flip(self, img, bboxes):
        # With 50% probability.
        if random.random() < 0.5:
            _, w, _ = img.shape
            img = img[:, ::-1, :]
            bboxes[:, [0, 2]] = w - bboxes[:, [2, 0]]

        return img, bboxes

    def random_crop(self, img, bboxes):
        if random.random() < 0.5:
            h, w, _ = img.shape
            # bboxes: (x1, y1, x2, y2)
            # cv2_imshow(img)
            # print(bboxes)
            enclose = np.concatenate([np.min(bboxes[:, 0:2], axis=0),
                                      np.max(bboxes[:, 2:4], axis=0)], axis=-1)
            
            max_l_trans = enclose[0]
            max_u_trans = enclose[1]
            max_r_trans = w - enclose[2]
            max_d_trans = h - enclose[3]

            crop_xmin = max(0, int(enclose[0] - random.uniform(0, max_l_trans)))
            crop_ymin = max(0, int(enclose[1] - random.uniform(0, max_u_trans)))
            crop_xmax = max(w, int(enclose[2] + random.uniform(0, max_r_trans)))
            crop_ymax = max(h, int(enclose[3] + random.uniform(0, max_d_trans)))

            img = img[crop_ymin:crop_ymax, crop_xmin:crop_xmax, :]

            bboxes[:, (0, 2)] = bboxes[:, (0, 2)] - crop_xmin
            bboxes[:, (1, 3)] = bboxes[:, (1, 3)] - crop_ymin

        return img, bboxes

    def random_translate(self, img, bboxes):
        if random.random() < 0.5:
            h, w, _ = img.shape
            enclose = np.concatenate([np.min(bboxes[:, 0:2], axis=0),
                                      np.max(bboxes[:, 2:4], axis=0)], axis=-1)

            max_l_trans = enclose[0]
            max_u_trans = enclose[1]
            max_r_trans = w - enclose[2]
            max_d_trans = h - enclose[3]

            tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1))
            ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1))

            img = cv2.warpAffine(src=img, M=np.array([[1, 0, tx], [0, 1, ty]]), dsize=(w, h))

            bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx
            bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty

        return img, bboxes

    def __len__(self):
        return self.n_batchs

trainset = Dataset("train")

In [8]:
global train_from_checkpoint

save_best_only = True
# Saves all the best validated checkpoints in training process.(This may require a lot of disk spaces.)
save_checkpoints = False

trainset = Dataset("train")
testset = Dataset("test")

steps_per_epoch = len(trainset)
global_steps = tf.Variable(1, trainable=False, dtype=tf.int64)
warmup_steps = warmup_epochs*steps_per_epoch
tot_steps = epochs*steps_per_epoch

model = YOLOv3(training=True)

# model.summary()

In [9]:
trainset = Dataset("train")
transfer_learning = False
if transfer_learning:
    # Transfer learning from Darknet 53 weights.
    # Resets all state generated by Keras.
    tf.keras.backend.clear_session()

    with open("/content/drive/My Drive/Computer Vision/model_data/yolov3.weights", "rb") as f:
        _, _, _, _, _ = np.fromfile(f, dtype=np.int32, count=5)

        j = 0
        for i in range(75):
            conv_layer = model.get_layer("conv2d" if i == 0 else f"conv2d_{i}")
            filters = conv_layer.filters
            kernel_size = conv_layer.kernel_size[0]
            in_dim = conv_layer.input_shape[-1]

            if i not in [58, 66, 74]:
                # order: [beta, gamma, mean, variance](darknet) -> [gamma, beta, mean, variance](tf)
                bn_weights = np.fromfile(f, dtype=np.float32, count=4*filters).reshape((4, filters))[[1, 0, 2, 3]]
                bn_layer = model.get_layer("batch_normalization" if j == 0 else f"batch_normalization_{j}")
                j += 1
            else:
                conv_bias = np.fromfile(f, dtype=np.float32, count=filters)

            conv_shape = (filters, in_dim, kernel_size, kernel_size)
            # shape: (out_dim, in_dim, h, w) -> (h, w, in_dim, out_dim)
            conv_weights = np.fromfile(f, dtype=np.float32, count=np.prod(conv_shape)).reshape(conv_shape)\
            .transpose((2, 3, 1, 0))

            if i not in [58, 66, 74]:
                conv_layer.set_weights([conv_weights])
                bn_layer.set_weights(bn_weights)
            else:
                conv_layer.set_weights([conv_weights, conv_bias])

# if train_from_checkpoint:
#     model.load_weights("./checkpoints/yolov3_custom")

opt = tf.keras.optimizers.Adam()
best_val_loss = 1000 # should be large at start
for epoch in range(epochs):
    for img_data, target in trainset:
        with tf.GradientTape() as tape:
            pred_result = model(img_data, training=True)
            giou_loss = conf_loss = prob_loss=0
            # optimizing process
            for i in range(3):
                conv, pred = pred_result[i*2], pred_result[i*2 + 1]
                loss_items = compute_loss(pred, conv,*target[i], i)
                giou_loss += loss_items[0]
                conf_loss += loss_items[1]
                prob_loss += loss_items[2]
            tot_loss = giou_loss + conf_loss + prob_loss
            gras = tape.gradient(tot_loss, yolo.trainable_variables)
            opt.apply_gradients(zip(grads, yolo.trainable_variables))

            # update learning rate
            global_steps.assign_add(1)
            if global_steps < warmup_steps:# and not TRAIN_TRANSFER:
                lr = global_steps / warmup_steps*init_lr
            else:
                lr = fin_lr + 0.5*(init_lr - fin_lr)*(
                    (1 + tf.cos((global_steps - warmup_steps) / (tot_steps - warmup_steps)*np.pi)))
            optimizer.lr.assign(lr.numpy())

        results = global_steps.numpy(), opt.lr.numpy(), giou_loss.numpy(), conf_loss.numpy(), prob_loss.numpy(), tot_loss.numpy()
        cur_step = results[0]%steps_per_epoch
        print(f"epoch:{epoch:2.0f} step:{cur_step:5.0f}/{steps_per_epoch}, lr:{results[1]:.6f},\
        giou_loss:{results[2]:7.2f}, conf_loss:{results[3]:7.2f}, prob_loss:{results[4]:7.2f},\
        tot_loss:{results[5]:7.2f}")

    count, giou_val, conf_val, prob_val, tot_val = 0., 0, 0, 0, 0
    for img_data, target in testset:
        with tf.GradientTape() as tape:
            pred_result = model(img_data, training=False)
            giou_loss=conf_loss=prob_loss=0

            # optimizing process
            for i in range(3):
                conv, pred = pred_result[i*2], pred_result[i*2+1]
                loss_items = compute_loss(pred, conv,*target[i], i)
                giou_loss += loss_items[0]
                conf_loss += loss_items[1]
                prob_loss += loss_items[2]
            tot_loss = giou_loss + conf_loss + prob_loss

        results = giou_loss.numpy(), conf_loss.numpy(), prob_loss.numpy(), tot_loss.numpy()
        count += 1
        giou_val += results[0]
        conf_val += results[1]
        prob_val += results[2]
        tot_val += results[3]
        
    print(f"\n\ngiou_val_loss:{giou_val/count:7.2f}, conf_val_loss:{conf_val/count:7.2f},\
    prob_val_loss:{prob_val/count:7.2f}, tot_val_loss:{tot_val/count:7.2f}\n\n.")

    if save_best_only and best_val_loss > tot_val/count: 
        model.save_weights("/content/drive/My Drive/Computer Vision/checkpoints/yolov3_custom")
        best_val_loss = tot_val/count

TypeError: ignored

In [None]:
YOLOv3(training=True)

In [None]:
ID = random.randint(0, 200)
# model.load_weights("./checkpoints/yolov3_custom")
ori_img = cv2.imread("/content/drive/My Drive/Computer Vision/Allhydrants-1920x1080-ca61f9ea607efb2f02f1ef97b781ee0f.jpg")

img_paded = preprocess_image(img=ori_img)
# Add a dimension for batch size.
img_paded = img_paded[None, ...]

pred_bboxes = model.predict(img_paded)
pred_bboxes = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bboxes]
# [(52*52*3, 85), (26*26*3, 85), (13*13*3, 85)] -> ((52*52*3 + 26*26*3 + 13*13*3), 85)
pred_bboxes = tf.concat(pred_bboxes, axis=0)

coors = pred_bboxes[:, :4]
confs = pred_bboxes[:, 4]
probs = pred_bboxes[:, 5:]

# shape: (x, y, w, h) -> (x1, y1, x2, y2)
coors = np.concatenate([coors[:, :2] - coors[:, 2:]*0.5,
                        coors[:, :2] + coors[:, 2:]*0.5], axis=-1)

# (x1, y1, x2, y2) -> (x1_ori, y1_ori, x2_ori, y2_ori)
ori_h, ori_w = ori_img.shape[:2]
resize_ratio = min(input_size/ori_w, input_size/ori_h)

pad_w = (input_size - resize_ratio*ori_w)/2
pad_h = (input_size - resize_ratio*ori_h)/2

coors[:, (0, 2)] = (coors[:, (0, 2)] - pad_w)/resize_ratio
coors[:, (1, 3)] = (coors[:, (1, 3)] - pad_h)/resize_ratio

# Discard bboxes with larger (x2, y2) than (x1, y1).
coors[:, :4] = np.concatenate([np.maximum(coors[:, (0, 1)], 0),
                            np.minimum(coors[:, (2, 3)], [ori_w-1, ori_h-1])], axis=-1)
coors[np.logical_or((coors[:, 0] > coors[:, 2]), (coors[:, 1] > coors[:, 3]))] = 0

# Discard bboxes with negative areas.
areas = np.sqrt(np.multiply.reduce(coors[:, (2, 3)] - coors[:, (0, 1)], axis=-1))
scale_mask = np.logical_and((areas > 0), (areas < np.inf))

# Discard bboxes with scores less than 0.3
argmax = np.argmax(probs, axis=-1)
scores = confs*np.max(probs, axis=-1)
score_mask = scores > 0.3

pick_mask = np.logical_and(scale_mask, score_mask)
coors, scores, argmax = coors[pick_mask], scores[pick_mask], argmax[pick_mask]
bboxes = np.concatenate([coors, scores[:, None], argmax[:, None]], axis=-1)

# Perform non_maximum_suppression
# bboxes = np.array(bboxes)
clss_in_img = list(set(bboxes[:, 5]))
best_bboxes = []
for cls in clss_in_img:
    bboxes_cls = bboxes[bboxes[:, 5] == cls]
    # Process 1: Determine whether the number of bounding boxes is greater than 0 
    while len(bboxes_cls) > 0:
        # Process 2: Select the bounding box with the highest score according to socre order A
        argmax = np.argmax(bboxes_cls[:, 4])
        best_bbox = bboxes_cls[argmax]
        best_bboxes.append(best_bbox)

        bboxes_cls = np.delete(bboxes_cls, argmax, axis=0)

        # Process 3: Calculate this bounding box A and remain all iou of the bounding box and remove
        # those bounding boxes whose iou value is higher than the thrsd.
        ious = np.array([compute_giou(best_bbox[:4], bbox_cls[:4]) for bbox_cls in bboxes_cls])

        bboxes_cls = bboxes_cls*(ious <= 0.45)[:, None]
        bboxes_cls = bboxes_cls[bboxes_cls[:, 4] > 0]

bboxes = best_bboxes
# Draw bboxes
img_h, img_w, _ = ori_img.shape

hsv_tuples = [(idx/n_clss, 1, 1) for idx in idx2cls.keys()]
colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
colors = list(map(lambda x: (int(x[0]*255.), int(x[1]*255.), int(x[2]*255.)), colors))

random.seed(0)
random.shuffle(colors)
random.seed(None)

for bbox in bboxes:
    coor = np.array(bbox[:4], dtype=np.int32)
    score = bbox[4]
    cls_idx = int(bbox[5])
    bbox_color = colors[cls_idx]
    bbox_thk = int(0.6*(img_h + img_w)/1000)
    bbox_thk = 1 if bbox_thk < 1 else bbox_thk
    font_scale = 0.75*bbox_thk
    (x1, y1), (x2, y2) = (coor[0], coor[1]), (coor[2], coor[3])

    cv2.rectangle(img=ori_img, pt1=(x1, y1), pt2=(x2, y2), color=bbox_color, thickness=bbox_thk*2)

    score_str = f"{score:.1%}" 
    label = f"{idx2cls[cls_idx]} " + score_str

    (text_w, text_h), baseline = cv2.getTextSize(text=label, fontFace=cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                                        fontScale=font_scale, thickness=bbox_thk)
    cv2.rectangle(img=ori_img, pt1=(x1, y1), pt2=(x1+text_w, y1+text_h+baseline),
                color=bbox_color, thickness=cv2.FILLED)
    cv2.putText(img=ori_img, text=label, org=(x1, y1+12), fontFace=cv2.FONT_HERSHEY_COMPLEX_SMALL,
                fontScale=font_scale, color=(0, 0, 0), thickness=bbox_thk, lineType=cv2.LINE_AA)

cv2_imshow(ori_img)