In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Layer, Conv2D, BatchNormalization, MaxPool2D
from tensorflow import keras

from absl.flags import FLAGS
from absl import app, flags
from pathlib import Path
import logging
import yaml
import math
import numpy as np
LOGGER = logging.getLogger(__name__)
import os
import matplotlib.pyplot as plt
import glob
import time
import pickle
import cv2
import pynvml 
from tensorflow.python.client import device_lib
import math
import pandas as pd
device_lib.list_local_devices()
import shutil
import pathlib
import random
import sys
import colorsys
import copy

ModuleNotFoundError: No module named 'yaml'

In [None]:
class Conv2d(keras.layers.Layer):
    def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
        super(Conv2d, self).__init__()
        assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
        self.conv = keras.layers.Conv2D(
            c2, k, s, 'VALID', use_bias=bias,
            kernel_initializer=keras.initializers.Constant(w.weight.permute(2, 3, 1, 0).numpy()),
            bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None, )

    def call(self, inputs):
        return self.conv(inputs)


class LeakyRelu(object):
    def __call__(self, x):
        return tf.nn.leaky_relu(x)

class Conv(Layer):
    def __init__(self, filters, kernel_size, strides, padding='SAME', groups=1):
        super(Conv, self).__init__()
        self.conv = Conv2D(filters, kernel_size, strides, padding, groups=groups, use_bias=False,
                           kernel_initializer=tf.random_normal_initializer(stddev=0.01),
                           kernel_regularizer=tf.keras.regularizers.L2(5e-4))
        self.bn = BatchNormalization()
        self.activation = LeakyRelu()

    def call(self, x):
        return self.activation(self.bn(self.conv(x)))

class Focus(Layer):
    def __init__(self, filters, kernel_size, strides=1, padding='SAME'):
        super(Focus, self).__init__()
        self.conv = Conv(filters, kernel_size, strides, padding)

    def call(self, x):
        return self.conv(tf.concat([x[..., ::2, ::2, :],
                                    x[..., 1::2, ::2, :],
                                    x[..., ::2, 1::2, :],
                                    x[..., 1::2, 1::2, :]],
                                   axis=-1))

    
class Bottleneck(Layer):
    def __init__(self, units, shortcut=True, expansion=0.5):
        super(Bottleneck, self).__init__()
        self.conv1 = Conv(int(units * expansion), 1, 1)
        self.conv2 = Conv(units, 3, 1)
        self.shortcut = shortcut

    def call(self, x):
        if self.shortcut:
            return x + self.conv2(self.conv1(x))
        return self.conv2(self.conv1(x))


class BottleneckCSP(Layer):
    def __init__(self, units, n_layer=1, shortcut=True, expansion=0.5):
        super(BottleneckCSP, self).__init__()
        units_e = int(units * expansion)
        self.conv1 = Conv(units_e, 1, 1)
        self.conv2 = Conv2D(units_e, 1, 1, use_bias=False, kernel_initializer=tf.random_normal_initializer(stddev=0.01))
        self.conv3 = Conv2D(units_e, 1, 1, use_bias=False, kernel_initializer=tf.random_normal_initializer(stddev=0.01))
        self.conv4 = Conv(units, 1, 1)
        self.bn = BatchNormalization(momentum=0.03)
        self.activation = LeakyRelu()
        self.modules = tf.keras.Sequential([Bottleneck(units_e, shortcut, expansion=1.0) for _ in range(n_layer)])

    def call(self, x):
        y1 = self.conv3(self.modules(self.conv1(x)))
        y2 = self.conv2(x)
        return self.conv4(self.activation(self.bn(tf.concat([y1, y2], axis=-1))))


class C3(Layer):
    def __init__(self, units, n_layer=1, shortcut=False, expansion=0.5):
        super(C3, self).__init__()
        units_e = int(units)  # hidden channels
        self.conv1 = Conv(units_e, 1, 1)
        self.conv2 = Conv2D(units_e, 1, 1, use_bias=False, kernel_initializer=tf.random_normal_initializer(stddev=0.01))
        self.conv3 = Conv(units, 1, 1)
        self.bn = BatchNormalization()
        self.activation = LeakyRelu()
        self.modules = tf.keras.Sequential([Bottleneck(units_e, shortcut, expansion=1.0) for _ in range(n_layer)])

    def call(self, x):
        x1 = self.conv1(x)
        y1 = self.modules(x1)
        y2 = self.conv2(x1)
        return self.conv3(self.activation(self.bn(tf.concat([y1, y2], axis=-1))))


class SPP(Layer):
    def __init__(self, units, kernels=(5, 9, 13)):
        super(SPP, self).__init__()
        units_e = units // 2  # Todo:
        self.conv1 = Conv(units_e, 1, 1)
        self.conv2 = Conv(units, 1, 1)
        self.modules = [MaxPool2D(pool_size=x, strides=1, padding='SAME') for x in kernels]  # Todo: padding check

    def call(self, x):
        x = self.conv1(x)
        return self.conv2(tf.concat([x] + [module(x) for module in self.modules], axis=-1))
    
class SPPF(Layer):
    # Spatial pyramid pooling-Fast layer
    def __init__(self, c1, c2, k=5, w=None):
        super().__init__()
        c_ = c1 // 2  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1, w=w.cv1)
        self.cv2 = Conv(c_ * 4, c2, 1, 1, w=w.cv2)
        self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding='SAME')

    def call(self, inputs):
        x = self.cv1(inputs)
        y1 = self.m(x)
        y2 = self.m(y1)
        return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))

class SPPCSP(Layer):
    # Cross Stage Partial Networks
    def __init__(self, units, n=1, shortcut=False, expansion=0.5, kernels=(5, 9, 13)):
        super(SPPCSP, self).__init__()
        units_e = int(2 * units * expansion)
        self.conv1 = Conv(units_e, 1, 1)
        self.conv2 = Conv2D(units_e, 1, 1, use_bias=False, kernel_initializer=tf.random_normal_initializer(stddev=0.01))
        self.conv3 = Conv(units_e, 3, 1)
        self.conv4 = Conv(units_e, 1, 1)
        self.modules = [MaxPool2D(pool_size=x, strides=1, padding='same') for x in kernels]
        self.conv5 = Conv(units_e, 1, 1)
        self.conv6 = Conv(units_e, 3, 1)
        self.bn = BatchNormalization()
        self.act = LeakyRelu()
        self.conv7 = Conv(units, 1, 1)

    def call(self, x):
        x1 = self.conv4(self.conv3(self.conv1(x)))
        y1 = self.conv6(self.conv5(tf.concat([x1] + [module(x1) for module in self.modules], axis=-1)))
        y2 = self.conv2(x)
        return self.conv7(self.act(self.bn(tf.concat([y1, y2], axis=-1))))


class Upsample(Layer):
    def __init__(self, i=None, ratio=2, method='bilinear'):
        super(Upsample, self).__init__()
        self.ratio = ratio
        self.method = method

    def call(self, x):
        return tf.image.resize(x, (tf.shape(x)[1] * self.ratio, tf.shape(x)[2] * self.ratio), method=self.method)


class Concat(Layer):
    def __init__(self, dims=-1):
        super(Concat, self).__init__()
        self.dims = dims

    def call(self, x):
        return tf.concat(x, self.dims)

class Final_layer(keras.layers.Layer):
    def __init__(self, nc=20, anchors=(), ch=(), imgsz=(640, 640), w=None):  # detection layer
        super(Final_layer, self).__init__()
        self.stride = np.array([8, 16, 32], np.float32)
        self.nc = nc  # number of classes 20
        self.no = nc + 5  # number of outputs per anchor(类别+x/y/w/h/Conf) 25
        self.nl = len(anchors)  # number of detection layers 3
        self.na = len(anchors[0]) // 2  # number of anchors 3
        self.grid = [tf.zeros(1)] * self.nl  # init grid 
        self.anchors = tf.cast(tf.reshape(anchors, [self.nl, -1, 2]), tf.float32) # (3,3,2)
        self.m = [Conv2D(self.no * self.na, 1, use_bias=False) for _ in range(self.nl)]
        self.training = False  # set to False after building model
        self.imgsz = imgsz

    def call(self, inputs):
        x = []
        for i in range(self.nl):
            x.append(self.m[i](inputs[i]))
            #print(self.m[i](inputs[i]).shape)
            #x(bs,80, 80, 192) to x(None, 80, 80, 75)
            #x(bs,40, 40, 384) to x(None, 40, 40, 75)
            #x(bs,20,20,768)   to x(None, 20, 20, 75)
        return x   

class Detect(keras.layers.Layer):
    def __init__(self, nc=20, anchors=(), ch=(), imgsz=(640, 640), w=None):  # detection layer
        super(Detect, self).__init__()
        self.stride = np.array([8, 16, 32], np.float32)
        self.nc = nc  # number of classes 20
        self.no = nc + 5  # number of outputs per anchor(类别+x/y/w/h/Conf) 25
        self.nl = len(anchors)  # number of detection layers 3
        self.na = len(anchors[0]) // 2  # number of anchors 3
        self.grid = [tf.zeros(1)] * self.nl  # init grid 
        self.anchors = tf.cast(tf.reshape(anchors, [self.nl, -1, 2]), tf.float32) # (3,3,2)
        self.m = [Conv2D(self.no * self.na, 1, use_bias=False) for _ in range(self.nl)]
        self.training = False  # set to False after building model
        self.imgsz = imgsz

    def call(self, inputs):
        z = []  # inference output
        x = []
        for i in range(self.nl):
            x.append(self.m[i](inputs[i]))
            #print(self.m[i](inputs[i]).shape)
            #x(bs,80, 80, 192) to x(None, 80, 80, 75)
            #x(bs,40, 40, 384) to x(None, 40, 40, 75)
            #x(bs,20,20,768)   to x(None, 20, 20, 75)
            ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
            _, grid1, grid2, _ = self.m[i](inputs[i]).shape
            #x[i] = tf.transpose(tf.reshape(x[i], [-1, ny * nx, self.na, self.no]), [0, 2, 1, 3])
            x[i] = tf.reshape(self.m[i](inputs[i]), (-1, grid1, grid2, self.na, self.no))
            #x(None, 80, 80, 75) to x(None, 80, 80, 3, 25)
            #x(None, 40, 40, 75) to x(None, 40, 40, 3, 25)
            #x(None, 20, 20, 75) to x(None, 20, 20, 3, 25)
            if not self.training:  # inference
                y = tf.sigmoid(x[i]) #输出结果约束在(0,1)
                
                grid_xy = tf.meshgrid(tf.range(grid1), tf.range(grid2))  # grid[x][y]==(y,x)
                grid_xy = tf.cast(tf.expand_dims(tf.stack(grid_xy, axis=-1), axis=2),tf.float32)  
                xy, wh, conf, classes = tf.split(y, (2, 2, 1, self.nc), axis=-1) #最后一个维度(25)把张量切分成2份，2份，1份，20份分别给4个变量
                pred_xy = (xy * 2. - 0.5 + grid_xy) * self.stride[i]  # decode pred to xywh
                pred_wh = (wh * 2) ** 2 * self.anchors[i] * self.stride[i]
                #还原到原图大小的xywh
                out = tf.concat([pred_xy, pred_wh, conf, classes], axis=-1)
                #print(out.shape)
                z.append(out)
                #xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
                #wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]
                # Normalize xywh to 0-1 to reduce calibration error
                # xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
                # wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
                # y = tf.concat([xy, wh, y[..., 4:]], -1)
                # z.append(tf.reshape(y, [-1, 3 * ny * nx, self.no]))        
        return z

    @staticmethod
    def _make_grid(nx=20, ny=20):
        # yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
        # return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
        xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
        return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
    
class AgnosticNMS(keras.layers.Layer):
    # TF Agnostic NMS
    def call(self, input, topk_all, iou_thres, conf_thres):
        # wrap map_fn to avoid TypeSpec related error https://stackoverflow.com/a/65809989/3036450
        return tf.map_fn(self._nms, input,
                         fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
                         name='agnostic_nms')

    @staticmethod
    def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25):  # agnostic NMS
        boxes, classes, scores = x
        class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
        scores_inp = tf.reduce_max(scores, -1)
        selected_inds = tf.image.non_max_suppression(
            boxes, scores_inp, max_output_size=topk_all, iou_threshold=iou_thres, score_threshold=conf_thres)
        selected_boxes = tf.gather(boxes, selected_inds)
        padded_boxes = tf.pad(selected_boxes,
                              paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
                              mode="CONSTANT", constant_values=0.0)
        selected_scores = tf.gather(scores_inp, selected_inds)
        padded_scores = tf.pad(selected_scores,
                               paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
                               mode="CONSTANT", constant_values=-1.0)
        selected_classes = tf.gather(class_inds, selected_inds)
        padded_classes = tf.pad(selected_classes,
                                paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
                                mode="CONSTANT", constant_values=-1.0)
        valid_detections = tf.shape(selected_inds)[0]
        return padded_boxes, padded_scores, padded_classes, valid_detections    

############################解析YOLO-v5的配置文件".yaml"构建keras框架下的网络层结构#######################################   
def parse_model(yaml_dict):  # model_dict, input_channels(3)
        anchors, nc = yaml_dict['anchors'], yaml_dict['nc']
        depth_multiple, width_multiple = yaml_dict['depth_multiple'], yaml_dict['width_multiple']
        num_anchors = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors
        output_dims = num_anchors * (nc + 5)
        layers = []
        # from, number, module, args
        for i, (f, number, module, args) in enumerate(yaml_dict['backbone'] + yaml_dict['head']):
            # all component is a Class, initialize here, call in self.forward
            module = eval(module) if isinstance(module, str) else module
            for j, arg in enumerate(args):
                try:
                    args[j] = eval(arg) if isinstance(arg, str) else arg  # eval strings, like Detect(nc, anchors)
                except:
                    pass
            number = max(round(number * depth_multiple), 1) if number > 1 else number  # control the model scale
            if module in [Conv2D, Conv, Bottleneck, SPP, Focus, BottleneckCSP, C3]:
                c2 = args[0]
                c2 = math.ceil(c2 * width_multiple / 8) * 8 if c2 != output_dims else c2
                args = [c2, *args[1:]]
                if module in [BottleneckCSP, C3, SPPCSP]:
                    args.insert(1, number)
                    number = 1
            modules = tf.keras.Sequential(*[module(*args) for _ in range(number)]) if number > 1 else module(*args)    
            modules.i, modules.f = i, f
            layers.append(modules)  
        return layers #tf.keras.Sequential(layers)

class Model(object):
    def __init__(self, cfg='yolov5s.yaml', ch=3, nc=20, model=None, imgsz=(640, 640)):  # model, channels, classes
        super(Model, self).__init__()
        if isinstance(cfg, dict):
            self.yaml = cfg  # model dict
        else:  # is *.yaml
            import yaml  # for torch hub
            self.yaml_file = Path(cfg).name
            with open(cfg) as f:
                self.yaml = yaml.load(f, Loader=yaml.FullLoader)  # model dict
        self.imgsz =imgsz
        # Define model
        if nc and nc != self.yaml['nc']:
            print('Overriding %s nc=%g with nc=%g' % (cfg, self.yaml['nc'], nc))
            self.yaml['nc'] = nc  # override yaml value
        self.model = parse_model(self.yaml)
        if isinstance(model, Detect):
            # transfer the anchors to grid coordinator, 3 * 3 * 2
            model.anchors /= tf.reshape(module.stride, [-1, 1, 1])

     def predict(self, inputs, tf_nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45,
                conf_thres=0.25):

        y = []  # outputs
        x = inputs
        for i, m in enumerate(self.model):
        #for m in self.model.layers:
            if m.f != -1:  # if not from previous layer
                # x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
                if isinstance(m.f, int):
                    x = y[m.f]
                else:
                    x = [x if j == -1 else y[j] for j in m.f]
            x = m(x)  # run
            y.append(x)

        return x,y[9],y[6],y[4]

    def built_TFmodel(self, img_size, name='yolov5'):
        x = tf.keras.Input([img_size, img_size, 3])
        
        output, conv, route_2,route_1 = self.predict(x)
        
        conv1 = tf.keras.layers.Conv2D(1024, 2, padding='same',trainable=True)(conv)
        conv1 = tf.nn.leaky_relu(conv1)
        conv1 = tf.keras.layers.Conv2DTranspose(512, 2, strides=2, padding='same',trainable=True)(conv1)
        conv1 = tf.nn.leaky_relu(conv1)
        for i in range(4):
            conv1 = tf.keras.layers.Conv2D(512, 2, padding='same',trainable=True)(conv1)
            conv1 = tf.nn.leaky_relu(conv1)
            conv1 = tf.keras.layers.BatchNormalization()(conv1)

        conv2 = tf.concat([conv1, route_2], axis=-1)
        conv2 = tf.keras.layers.Conv2D(512, 2, padding='same',trainable=True)(conv2)
        conv2 = tf.nn.leaky_relu(conv2)
        conv2 = tf.keras.layers.Conv2DTranspose(256, 2, strides=2, padding='same',trainable=True)(conv2)
        conv2 = tf.nn.leaky_relu(conv2)
        for i in range(8):
            conv2 = tf.keras.layers.Conv2D(256, 2, padding='same',trainable=True)(conv2)
            conv2 = tf.nn.leaky_relu(conv2)
            conv2 = tf.keras.layers.BatchNormalization()(conv2)

        conv3 = tf.concat([conv2, route_1], axis=-1)
        conv3 = tf.keras.layers.Conv2D(256, 2, padding='same',trainable=True)(conv3)
        conv3 = tf.nn.leaky_relu(conv3)
        conv3 = tf.keras.layers.Conv2DTranspose(128, 2, strides=2, padding='same',trainable=True)(conv3)
        conv3 = tf.nn.leaky_relu(conv3)
        for i in range(8):
            conv3 = tf.keras.layers.Conv2D(128, 2, padding='same',trainable=True)(conv3)
            conv3 = tf.nn.leaky_relu(conv3)
            conv3 = tf.keras.layers.BatchNormalization()(conv3)

        for i in range(2):
            conv3= tf.keras.layers.Conv2DTranspose(64, 2, strides=2, padding='same',trainable=True)(conv3)
            conv3 = tf.nn.leaky_relu(conv3)
            conv3 = tf.keras.layers.Conv2D(64, 2, padding='same',trainable=True)(conv3)
            conv3 = tf.nn.leaky_relu(conv3)
            conv3 = tf.keras.layers.BatchNormalization()(conv3)

        conv4 = tf.keras.layers.Conv2D(32, 2, padding='same',trainable=True)(conv3) 
        conv4 = tf.keras.layers.Conv2D(32, 2, padding='same',trainable=True)(conv4)
        conv4 = tf.nn.leaky_relu(conv4)
        output_data_image = tf.keras.layers.Conv2D(2, 1, padding='same',activation= "softmax",trainable=True)(conv4)  
        
        return tf.keras.Model(inputs=x, outputs=[output_data_image,output], name=name)
        
     # @staticmethod
    def _xywh2xyxy(xywh):
        # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
        x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
        return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)

In [None]:
#构建网络#
def run(imgsz=(640, 640),  # inference size h,w
         batch_size=1,  # batch size
         dynamic=False):

    # TensorFlow model
    #im = tf.zeros((batch_size, 640, 640, 3))  # BHWC image
    tf_model = Model(cfg='F:\HCJ_for_AI_training\Yolov5-TF-main\Yolov5-TF-main\yolov5.yaml', imgsz=imgsz, nc=4)
    anchors = tf_model.model[-1].anchors
    anchors = anchors / np.array([8,16,32])[:, np.newaxis]
    #y = tf_model.predict(im)  # inference
    TFmodel = tf_model.built_TFmodel(imgsz[0])
    return TFmodel,anchors

def decode(conv_output1,conv_output2,conv_output3,ANCHORS,num_classes=4):
    """
    return tensor of shape [batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes]
            contains (x, y, w, h, score, probability)
    conv_output1,conv_output2,conv_output3 = 80 ,40, 20      
   """
    conv_shape1       = tf.shape(conv_output1)
    conv_shape2       = tf.shape(conv_output2)
    conv_shape3       = tf.shape(conv_output3)
    batch_size        = conv_shape1[0]
    output_size1      = conv_shape1[1]
    output_size2      = conv_shape2[1]
    output_size3      = conv_shape3[1]

    # 对 tensor 进行 reshape
    conv_output1 = tf.reshape(conv_output1, (batch_size, output_size1, output_size1, 3, 5 + num_classes))
    conv_output2 = tf.reshape(conv_output2, (batch_size, output_size2, output_size2, 3, 5 + num_classes))
    conv_output3 = tf.reshape(conv_output3, (batch_size, output_size3, output_size3, 3, 5 + num_classes))

    # 按顺序提取[x, y, w, h, c]
    conv_raw_dxdy1 = conv_output1[:, :, :, :, 0:2] # 中心位置的偏移量
    conv_raw_dwdh1 = conv_output1[:, :, :, :, 2:4] # 预测框长宽的偏移量
    conv_raw_conf1 = conv_output1[:, :, :, :, 4:5] # 预测框的置信度
    conv_raw_prob1 = conv_output1[:, :, :, :, 5: ] # 预测框的类别概率
    
    conv_raw_dxdy2 = conv_output2[:, :, :, :, 0:2] # 中心位置的偏移量
    conv_raw_dwdh2 = conv_output2[:, :, :, :, 2:4] # 预测框长宽的偏移量
    conv_raw_conf2 = conv_output2[:, :, :, :, 4:5] # 预测框的置信度
    conv_raw_prob2 = conv_output2[:, :, :, :, 5: ] # 预测框的类别概率
    
    conv_raw_dxdy3 = conv_output3[:, :, :, :, 0:2] # 中心位置的偏移量
    conv_raw_dwdh3 = conv_output3[:, :, :, :, 2:4] # 预测框长宽的偏移量
    conv_raw_conf3 = conv_output3[:, :, :, :, 4:5] # 预测框的置信度
    conv_raw_prob3 = conv_output3[:, :, :, :, 5: ] # 预测框的类别概率

    # 好了，接下来是画网格。其中，output_size1,2,3 等于 80、40 或者 20
    y1 = tf.tile(tf.range(output_size1, dtype=tf.int32)[:, tf.newaxis], [1, output_size1])
    x1 = tf.tile(tf.range(output_size1, dtype=tf.int32)[tf.newaxis, :], [output_size1, 1])
    xy_grid1 = tf.concat([x1[:, :, tf.newaxis], y1[:, :, tf.newaxis]], axis=-1)
    xy_grid1 = tf.tile(xy_grid1[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, 3, 1])
    xy_grid1 = tf.cast(xy_grid1, tf.float32) # 计算网格左上角的位置，即cx cy的值
    
    y2 = tf.tile(tf.range(output_size2, dtype=tf.int32)[:, tf.newaxis], [1, output_size2])
    x2 = tf.tile(tf.range(output_size2, dtype=tf.int32)[tf.newaxis, :], [output_size2, 1])
    xy_grid2 = tf.concat([x2[:, :, tf.newaxis], y2[:, :, tf.newaxis]], axis=-1)
    xy_grid2 = tf.tile(xy_grid2[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, 3, 1])
    xy_grid2 = tf.cast(xy_grid2, tf.float32) # 计算网格左上角的位置，即cx cy的值

    y3 = tf.tile(tf.range(output_size3, dtype=tf.int32)[:, tf.newaxis], [1, output_size3])
    x3 = tf.tile(tf.range(output_size3, dtype=tf.int32)[tf.newaxis, :], [output_size3, 1])
    xy_grid3 = tf.concat([x3[:, :, tf.newaxis], y3[:, :, tf.newaxis]], axis=-1)
    xy_grid3 = tf.tile(xy_grid3[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, 3, 1])
    xy_grid3 = tf.cast(xy_grid3, tf.float32) # 计算网格左上角的位置，即cx cy的值
    
    # 根据上图公式计算预测框的中心位置
    pred_xy1 = (tf.sigmoid(conv_raw_dxdy1) + xy_grid1) * 8 # 计算预测框在原图尺寸上的x y
    pred_wh1 = ((tf.sigmoid(conv_raw_dwdh1)*2) * ANCHORS[0]) * 8 # 计算预测框在原图尺寸上的w h
    #pred_xy1 = (tf.sigmoid(conv_raw_dxdy1) + xy_grid1) * 8 # 计算预测框在原图尺寸上的x y
    #pred_wh1 = (tf.exp(conv_raw_dwdh1) * ANCHORS[0]) * 8 # 计算预测框在原图尺寸上的w h
    pred_xywh1 = tf.concat([pred_xy1, pred_wh1], axis=-1) # 拼接起来
    pred_conf1 = tf.sigmoid(conv_raw_conf1)# 计算预测框里object的置信度
    pred_prob1 = tf.sigmoid(conv_raw_prob1) # 计算预测框里object的类别概率

    pred_xy2 = (tf.sigmoid(conv_raw_dxdy2) + xy_grid2) * 16 # 计算预测框在原图尺寸上的x y
    pred_wh2 = ((tf.sigmoid(conv_raw_dwdh2)*2) * ANCHORS[1]) * 16     # 计算预测框在原图尺寸上的w h
    #pred_xy2 = (tf.sigmoid(conv_raw_dxdy2) + xy_grid2) * 16 # 计算预测框在原图尺寸上的x y
    #pred_wh2 = (tf.exp(conv_raw_dwdh2) * ANCHORS[1]) * 16 # 计算预测框在原图尺寸上的w h
    pred_xywh2 = tf.concat([pred_xy2, pred_wh2], axis=-1) # 拼接起来
    pred_conf2 = tf.sigmoid(conv_raw_conf2) # 计算预测框里object的置信度
    pred_prob2 = tf.sigmoid(conv_raw_prob2) # 计算预测框里object的类别概率
    
    pred_xy3 = (tf.sigmoid(conv_raw_dxdy3 ) + xy_grid3) * 32 # 计算预测框在原图尺寸上的x y
    pred_wh3 = ((tf.sigmoid(conv_raw_dwdh3)*2) * ANCHORS[2]) * 32    # 计算预测框在原图尺寸上的w h
    #pred_xy3 = (tf.sigmoid(conv_raw_dxdy3) + xy_grid3) * 32 # 计算预测框在原图尺寸上的x y
    #pred_wh3 = (tf.exp(conv_raw_dwdh3) * ANCHORS[2]) * 32 # 计算预测框在原图尺寸上的w h
    pred_xywh3 = tf.concat([pred_xy3, pred_wh3], axis=-1) # 拼接起来
    pred_conf3 = tf.sigmoid(conv_raw_conf3) # 计算预测框里object的置信度
    pred_prob3 = tf.sigmoid(conv_raw_prob3)# 计算预测框里object的类别概率
    
    decode_output1=tf.concat([pred_xywh1, pred_conf1, pred_prob1], axis=-1)
    decode_output2=tf.concat([pred_xywh2, pred_conf2, pred_prob2], axis=-1)
    decode_output3=tf.concat([pred_xywh3, pred_conf3, pred_prob3], axis=-1)
    
    return decode_output1,decode_output2,decode_output3

In [None]:
##############损失函数###########################
def bbox_iou(boxes1, boxes2):

    boxes1_area = boxes1[..., 2] * boxes1[..., 3]
    boxes2_area = boxes2[..., 2] * boxes2[..., 3]

    boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
                        boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
    boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
                        boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)

    left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
    right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])

    inter_section = tf.maximum(right_down - left_up, 0.0)
    inter_area = inter_section[..., 0] * inter_section[..., 1]
    union_area = boxes1_area + boxes2_area - inter_area

    return 1.0 * inter_area / (union_area+1e-4) #[0,1]

def bbox_giou(boxes1, boxes2):
    
    boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
                        boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
    boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
                        boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)

    boxes1 = tf.concat([tf.minimum(boxes1[..., :2], boxes1[..., 2:]),
                        tf.maximum(boxes1[..., :2], boxes1[..., 2:])], axis=-1)
    boxes2 = tf.concat([tf.minimum(boxes2[..., :2], boxes2[..., 2:]),
                        tf.maximum(boxes2[..., :2], boxes2[..., 2:])], axis=-1)

    boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
    boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])

    left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
    right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])

    inter_section = tf.maximum(right_down - left_up, 0.0)
    inter_area = inter_section[..., 0] * inter_section[..., 1]
    union_area = boxes1_area + boxes2_area - inter_area
    iou = inter_area / (union_area+1e-4)
    
    enclose_left_up = tf.minimum(boxes1[..., :2], boxes2[..., :2])
    enclose_right_down = tf.maximum(boxes1[..., 2:], boxes2[..., 2:])
    enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0)
    enclose_area = enclose[..., 0] * enclose[..., 1]

    giou = iou - 1.0 * (enclose_area - union_area+1e-4) / (enclose_area+1e-4) #范围（-1，1]
    
    return giou


def compute_loss(output_data_image, outputbox1,outputbox2,outputbox3, bboxes1, bboxes2, bboxes3, image_label,
                 batch_lbboxes, batch_mbboxes,batch_sbboxes,conv_raw1, conv_raw2, conv_raw3):
    """
    output_data_image = [batch_size,640,640,2] 
    outputbox1 = [batch_size, P/8，P/8，3,（4 + connf + num_classes）] 
    outputbox2 = [batch_size, P/16，P/16，3,（4 + connf + num_classes）] 
    outputbox3 = [batch_size, P/32，P/32，3,（4 + connf + num_classes）] 
    bboxes1 = [batch_size, P/8，P/8，3,（4 + connf + num_classes）] 
    bboxes2 = [batch_size, P/16，P/16，3,（4 + connf + num_classes）] 
    bboxes3 = [batch_size, P/32，P/32，3,（4 + connf + num_classes）] 
    image_label=[batch_size,640,640,2] 
    """
    num_classes = 4
    batch_size=outputbox1.shape[0]
    
    conv_raw1 = tf.reshape(conv_raw1, (batch_size, 80, 80, 3, 5 + 4))
    conv_raw2 = tf.reshape(conv_raw2, (batch_size, 40, 40, 3, 5 + 4))
    conv_raw3 = tf.reshape(conv_raw3, (batch_size, 20, 20, 3, 5 + 4))
    
    #损失1;分类值损失
    real_P1 = bboxes1[:,:,:,:,5:]
    real_P2 = bboxes2[:,:,:,:,5:]
    real_P3 = bboxes3[:,:,:,:,5:]
    pred_P1 = outputbox1[:,:,:,:,5:]
    pred_P2 = outputbox2[:,:,:,:,5:]
    pred_P3 = outputbox3[:,:,:,:,5:]
    prob_loss1 =  tf.keras.losses.categorical_crossentropy(real_P1, pred_P1+1e-10)
    prob_loss2 =  tf.keras.losses.categorical_crossentropy(real_P2, pred_P2+1e-10)
    prob_loss3 =  tf.keras.losses.categorical_crossentropy(real_P3, pred_P3+1e-10)
    Prob_loss1 = tf.reduce_mean(tf.reduce_sum(prob_loss1,axis=[1,2,3]))
    Prob_loss2 = tf.reduce_mean(tf.reduce_sum(prob_loss2,axis=[1,2,3]))
    Prob_loss3 = tf.reduce_mean(tf.reduce_sum(prob_loss3,axis=[1,2,3]))
    Prob_loss = Prob_loss1 + Prob_loss2 + Prob_loss3
    
   #损失2;Iou损失
    real_boxes1 = bboxes1[:,:,:,:,0:4]
    real_boxes2 = bboxes2[:,:,:,:,0:4]
    real_boxes3 = bboxes3[:,:,:,:,0:4]

    pred_boxes1 = outputbox1[:,:,:,:,0:4]
    pred_boxes2 = outputbox2[:,:,:,:,0:4]
    pred_boxes3 = outputbox3[:,:,:,:,0:4]
    
    input_size = tf.cast(640, tf.float32)
    
    giou1 = tf.expand_dims(bbox_giou( pred_boxes1, real_boxes1), axis=-1)
    giou2 = tf.expand_dims(bbox_giou( pred_boxes2, real_boxes2), axis=-1)
    giou3 = tf.expand_dims(bbox_giou( pred_boxes3, real_boxes3), axis=-1)
    bbox_loss_scale1 = 2.0 - 1.0 *  real_boxes1[:, :, :, :, 2:3] *  real_boxes1[:, :, :, :, 3:4] / (input_size * input_size )
    bbox_loss_scale2 = 2.0 - 1.0 *  real_boxes2[:, :, :, :, 2:3] *  real_boxes2[:, :, :, :, 3:4] / (input_size * input_size )
    bbox_loss_scale3 = 2.0 - 1.0 *  real_boxes3[:, :, :, :, 2:3] *  real_boxes3[:, :, :, :, 3:4] / (input_size * input_size )
    giou_loss1 = bboxes1[:,:,:,:,4:5] * bbox_loss_scale1 * (1- giou1)
    giou_loss2 = bboxes2[:,:,:,:,4:5] * bbox_loss_scale2 * (1- giou2)
    giou_loss3 = bboxes3[:,:,:,:,4:5] * bbox_loss_scale3 * (1- giou3)
    
    #iou1 = bboxes1[:,:,:,:,4:5] * (1-tf.expand_dims(bbox_giou(pred_boxes1 , real_boxes1),axis=-1))
    #iou2 = bboxes2[:,:,:,:,4:5] * (1-tf.expand_dims(bbox_giou(pred_boxes2 , real_boxes2),axis=-1))
    #iou3 = bboxes3[:,:,:,:,4:5] * (1-tf.expand_dims(bbox_giou(pred_boxes3 , real_boxes3),axis=-1))

    Iou_loss1 = tf.reduce_mean(tf.reduce_sum(giou_loss1,axis=[1,2,3,4]))
    Iou_loss2 = tf.reduce_mean(tf.reduce_sum(giou_loss2,axis=[1,2,3,4]))
    Iou_loss3 = tf.reduce_mean(tf.reduce_sum(giou_loss3,axis=[1,2,3,4]))
    
    Iou_loss = Iou_loss1 + Iou_loss2 + Iou_loss3
    #损失3：图像损失
    image_loss = tf.keras.losses.binary_crossentropy(image_label,output_data_image)
    Image_loss = tf.reduce_mean(tf.reduce_sum(image_loss, axis=[1,2]))
    
    #损失4：confidecnce损失
    
    conv_raw_conf1 = conv_raw1[:,:,:,:,4:5]#没过sigmoid的
    conv_raw_conf2 = conv_raw2[:,:,:,:,4:5]
    conv_raw_conf3 = conv_raw3[:,:,:,:,4:5]
    
    pred_conf1 = outputbox1[:,:,:,:,4:5]
    pred_conf2 = outputbox2[:,:,:,:,4:5]
    pred_conf3 = outputbox3[:,:,:,:,4:5]
    
    IOU_LOSS_THRESH = 0.3
    
    max_iou1 = tf.expand_dims(tf.reduce_max(bbox_iou(pred_boxes1[:, :, :, :, np.newaxis, :], batch_lbboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :]), axis=-1), axis=-1)
    max_iou2 = tf.expand_dims(tf.reduce_max(bbox_iou(pred_boxes2[:, :, :, :, np.newaxis, :], batch_mbboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :]), axis=-1), axis=-1)
    max_iou3 = tf.expand_dims(tf.reduce_max(bbox_iou(pred_boxes3[:, :, :, :, np.newaxis, :], batch_sbboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :]), axis=-1), axis=-1)
    
    respond_bgd1 = (1.0 - bboxes1[:,:,:,:,4:5]) * tf.cast( max_iou1 < IOU_LOSS_THRESH, tf.float32 )
    respond_bgd2 = (1.0 - bboxes2[:,:,:,:,4:5]) * tf.cast( max_iou2 < IOU_LOSS_THRESH, tf.float32 )
    respond_bgd3 = (1.0 - bboxes3[:,:,:,:,4:5]) * tf.cast( max_iou3 < IOU_LOSS_THRESH, tf.float32 )

    conf_focal1 = tf.pow(bboxes1[:,:,:,:,4:5] - pred_conf1, 2)
    conf_focal2 = tf.pow(bboxes2[:,:,:,:,4:5] - pred_conf2, 2)
    conf_focal3 = tf.pow(bboxes3[:,:,:,:,4:5] - pred_conf3, 2)

    conf_loss1 = conf_focal1 * (
            bboxes1[:,:,:,:,4:5] * tf.nn.sigmoid_cross_entropy_with_logits(labels=bboxes1[:,:,:,:,4:5], logits=conv_raw_conf1+1e-10)
            +
            respond_bgd1 * tf.nn.sigmoid_cross_entropy_with_logits(labels=bboxes1[:,:,:,:,4:5], logits=conv_raw_conf1+1e-10)
    )
    
    conf_loss2 = conf_focal2 * (
            bboxes2[:,:,:,:,4:5] * tf.nn.sigmoid_cross_entropy_with_logits(labels=bboxes2[:,:,:,:,4:5], logits=conv_raw_conf2+1e-10)
            +
            respond_bgd2 * tf.nn.sigmoid_cross_entropy_with_logits(labels=bboxes2[:,:,:,:,4:5], logits=conv_raw_conf2+1e-10)
    )
    
    conf_loss3 = conf_focal3 * (
            bboxes3[:,:,:,:,4:5] * tf.nn.sigmoid_cross_entropy_with_logits(labels=bboxes3[:,:,:,:,4:5], logits=conv_raw_conf3+1e-10)
            +
            respond_bgd3 * tf.nn.sigmoid_cross_entropy_with_logits(labels=bboxes3[:,:,:,:,4:5], logits=conv_raw_conf3+1e-10)
    )

    Conf_loss1 = tf.reduce_mean(tf.reduce_sum(conf_loss1,axis=[1,2,3]))
    Conf_loss2 = tf.reduce_mean(tf.reduce_sum(conf_loss2,axis=[1,2,3]))
    Conf_loss3 = tf.reduce_mean(tf.reduce_sum(conf_loss3,axis=[1,2,3]))
    
    Conf_loss  = Conf_loss1+Conf_loss2+Conf_loss3

    return Image_loss, Prob_loss, Iou_loss, Conf_loss

In [None]:
# 导入数据集
image_dir= r'F:\HCJ_for_AI_training\YOLO-crack\Image'
#label的分类
LABEL_BOX=['zong','wang','xie','heng']

LABEL_number=[0,1,2,3]

def get_image_and_labels_paths(image_dir:str):
    '''
    获取所有图片与对应标签的路径 
    '''
    label_dir = r'F:\HCJ_for_AI_training\YOLO-crack\Labels'
    all_paths=[]
    all_label_data_path = []
    all_image_S_path = []
    image_original_path = pathlib.Path(image_dir)
    all_image_o_path = list(image_original_path.glob('*/*'))
    all_image_o_path = [str(p) for p in  all_image_o_path]
    for p in all_image_o_path:
        all_label_data_path.append(label_dir +'\\'+ p[-10:-4] + '.txt')
        all_image_S_path.append(label_dir +'\\'+ p[-10:-4] + '.png')
    for i in range(len(all_image_o_path)):
        all_paths.append([all_image_o_path[i], all_label_data_path[i],all_image_S_path[i]])
    random.shuffle(all_paths)
    return all_paths

paths = get_image_and_labels_paths(image_dir) 
#paths_count = len(paths)
BATCH_SIZE = 8

#创建图片路径及其数字标签的dataset
My_dataset = tf.data.Dataset.from_tensor_slices(paths)
My_dataset  = My_dataset.batch(BATCH_SIZE)

def load_image(image_path):
    image_path = (str(image_path)[12:-26]).replace("\\\\","/")
    image = cv2.imread(image_path)
    return image

def load_label(label_path):
    label_path = (str(label_path)[12:-26]).replace("\\\\","/")
    label = open(label_path)
    result=[]
    for line in label.readlines():
        line = line.strip('\n')  #去掉列表中每一个元素的换行符
        Line = line.split() 
        result.append(Line)
    result = np.array(result,dtype = float)
    #print(result)

    for i in range(len(result)):
        result[i][0]=result[i][0]-15
  
    return result

def load_image_label(image_label_path):
    image_path = (str(image_label_path)[12:-26]).replace("\\\\","/")
    image = cv2.imread(image_path,0)
    image = image[...,np.newaxis]
    return image

def image_preporcess(image, target_size,  image_label, gt_boxes=None,):
    #图和target统一进行640的resize
    #boxes:(x,y,w,h)的百分比
    ih, iw    = target_size
    h,  w, _  = image.shape

    scale = min(iw/w, ih/h)
    nw, nh  = int(scale * w), int(scale * h)
    image_resized = cv2.resize(image, (nw, nh))
    #print(np.array(image_label).shape)
    image_label_resized = cv2.resize(image_label, (nw, nh))[:,:, np.newaxis]
    #print(np.array(image_label_resized).shape)

    image_paded = np.full(shape=[ih, iw, 3], fill_value=0)
    image_label_paded = np.full(shape=[ih, iw, 1], fill_value=0)
    dw, dh = (iw - nw) // 2, (ih-nh) // 2
    
    image_paded[dh:nh+dh, dw:nw+dw, :] = image_resized
    image_paded = image_paded / 255.

    image_label_paded[dh:nh+dh, dw:nw+dw,:] = image_label_resized
    image_label_paded = image_label_paded / 255.
    
    if gt_boxes is None:
        return image_paded,image_label_paded

    else:
        gt_boxescopy = copy.deepcopy(gt_boxes)
        #(px,py,pw,ph) →(x,y,w,h)       
        gt_boxescopy[:, [1, 3]] = gt_boxescopy[:, [1, 3]] * w * scale + dw 
        gt_boxescopy[:, [2, 4]] = gt_boxescopy[:, [2, 4]] * h * scale + dh
        
        gt_boxescopy_R = gt_boxescopy

    return image_paded, gt_boxescopy_R, image_label_paded

def preprocess_true_boxes(bboxes, anchors):
    with tf.device('/cpu:0'):
        num_classes=4
        anchor_per_scale = 3
        max_bbox_per_scale = 10
        batch_count = 0
        train_input_size = random.choice([640])
        train_output_sizes = train_input_size // np.array([8,16,32])
        strides = np.array([8,16,32])
        
        label = [np.zeros((train_output_sizes[i], train_output_sizes[i], anchor_per_scale,
                           5 + num_classes)) for i in range(3)]
        bboxes_xywh = [np.zeros((max_bbox_per_scale, 4)) for _ in range(3)]
        bbox_count = np.zeros((3,))

        for bbox in bboxes:
            bbox_coor = bbox[1:]
            bbox_class_ind = int(bbox[0])

            onehot = np.zeros(num_classes, dtype=np.float)
            onehot[bbox_class_ind] = 1.0
            uniform_distribution = np.full(num_classes, 1.0 / num_classes)
            deta = 0.01
            smooth_onehot = onehot * (1 - deta) + deta * uniform_distribution

            #bbox_xywh = np.concatenate([(bbox_coor[2:] + bbox_coor[:2]) * 0.5, bbox_coor[2:] - bbox_coor[:2]], axis=-1)
            bbox_xywh =  bbox_coor
            bbox_xywh_scaled = 1.0 * bbox_xywh[np.newaxis, :] / strides[:, np.newaxis]

            iou = []
            exist_positive = False
            for i in range(3):
                anchors_xywh = np.zeros((anchor_per_scale, 4))
                anchors_xywh[:, 0:2] = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5
                anchors_xywh[:, 2:4] = anchors[i]

                iou_scale = bbox_iou(bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh)
                #print(iou_scale)
                iou.append(iou_scale)
                iou_mask = iou_scale > 0.3

                if np.any(iou_mask):
                    xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32)

                    label[i][yind, xind, iou_mask, :] = 0
                    label[i][yind, xind, iou_mask, 0:4] = bbox_xywh
                    label[i][yind, xind, iou_mask, 4:5] = 1.0
                    label[i][yind, xind, iou_mask, 5:] = smooth_onehot

                    bbox_ind = int(bbox_count[i] % max_bbox_per_scale)
                    bboxes_xywh[i][bbox_ind, :4] = bbox_xywh
                    bbox_count[i] += 1

                    exist_positive = True

            if not exist_positive:
                best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1)
                best_detect = int(best_anchor_ind / anchor_per_scale)
                best_anchor = int(best_anchor_ind % anchor_per_scale)
                xind, yind = np.floor(bbox_xywh_scaled[best_detect, 0:2]).astype(np.int32)

                label[best_detect][yind, xind, best_anchor, :] = 0
                label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh
                label[best_detect][yind, xind, best_anchor, 4:5] = 1.0
                label[best_detect][yind, xind, best_anchor, 5:] = smooth_onehot
                

                bbox_ind = int(bbox_count[best_detect] % max_bbox_per_scale)
                bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh
                bbox_count[best_detect] += 1

        label_lbbox, label_mbbox, label_sbbox = label
        lbboxes, mbboxes, sbboxes = bboxes_xywh
        return label_lbbox, label_mbbox, label_sbbox, lbboxes, mbboxes, sbboxes
    
def get_train_data(image_batch, gt_boxes_batch,image_label_batch, anchors):
    
    with tf.device('/cpu:0'):
        num_classes=4
        anchor_per_scale = 3
        max_bbox_per_scale = 10
        batch_size=len(image_batch)
        
        Image_label = np.zeros((batch_size, 640,640, 2),dtype=np.float32)
        
        train_input_size = random.choice([640])
        train_output_sizes = train_input_size // np.array([8,16,32])

        batch_image = np.zeros((batch_size, train_input_size, train_input_size, 3), dtype=np.float32)

        batch_label_1bbox = np.zeros((batch_size, train_output_sizes[0], train_output_sizes[0],
                                      anchor_per_scale, 5 + num_classes), dtype=np.float32)
        batch_label_2bbox = np.zeros((batch_size, train_output_sizes[1], train_output_sizes[1],
                                      anchor_per_scale, 5 + num_classes), dtype=np.float32)
        batch_label_3bbox = np.zeros((batch_size, train_output_sizes[2], train_output_sizes[2],
                                      anchor_per_scale, 5 + num_classes), dtype=np.float32)

        batch_lbboxes = np.zeros((batch_size, max_bbox_per_scale, 4), dtype=np.float32)
        batch_mbboxes = np.zeros((batch_size, max_bbox_per_scale, 4), dtype=np.float32)
        batch_sbboxes = np.zeros((batch_size, max_bbox_per_scale, 4), dtype=np.float32)
        
        for i in range(batch_size):

            image, bboxes, image_label = image_preporcess(image_batch[i], (train_input_size,train_input_size),image_label_batch[i], gt_boxes=gt_boxes_batch[i])

            Image_label[i,:,:,:]=tf.one_hot(image_label[:,:,0],depth=2)

            label_lbbox, label_mbbox, label_sbbox, lbboxes, mbboxes, sbboxes = preprocess_true_boxes(bboxes,anchors)

            batch_image[i, :, :, :] = image
            batch_label_1bbox[i, :, :, :, :] = label_lbbox
            batch_label_2bbox[i, :, :, :, :] = label_mbbox
            batch_label_3bbox[i, :, :, :, :] = label_sbbox
            batch_lbboxes[i, :, :] = lbboxes
            batch_mbboxes[i, :, :] = mbboxes
            batch_sbboxes[i, :, :] = sbboxes
        
        return batch_image, Image_label, batch_label_1bbox, batch_label_2bbox,batch_label_3bbox,  batch_lbboxes,batch_mbboxes,batch_sbboxes

In [None]:
def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold):

    valid_scale=[0, np.inf] 
    pred_bbox = np.array(pred_bbox)

    pred_xywh = pred_bbox[:, 0:4]
    pred_conf = pred_bbox[:, 4]
    pred_prob = pred_bbox[:, 5:]

    # # (1) (x, y, w, h) --> (xmin, ymin, xmax, ymax)
    pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5,
                                pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1)
    # # (2) (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org)
    org_h, org_w = org_img_shape
    resize_ratio = min(input_size / org_w, input_size / org_h)

    dw = (input_size - resize_ratio * org_w) / 2
    dh = (input_size - resize_ratio * org_h) / 2

    pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio
    pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio

    # # (3) clip some boxes those are out of range
    pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]),
                                np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1)
    invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3]))
    pred_coor[invalid_mask] = 0

    # # (4) discard some invalid boxes
    bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1))
    scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1]))
    
    # # (5) discard some boxes with low scores
    classes = np.argmax(pred_prob, axis=-1)
    scores = pred_conf
    score_mask = scores > score_threshold
    mask = np.logical_and(scale_mask, score_mask)
    coors, scores, classes = pred_coor[mask], scores[mask], classes[mask]
    
    return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)

def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
    """
    :param bboxes: (xmin, ymin, xmax, ymax, score, class)
    Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf
          https://github.com/bharatsingh430/soft-nms
    """
    classes_in_img = list(set(bboxes[:, 5]))
    best_bboxes = []

    for cls in classes_in_img:
        cls_mask = (bboxes[:, 5] == cls)
        cls_bboxes = bboxes[cls_mask]

        while len(cls_bboxes) > 0:
            max_ind = np.argmax(cls_bboxes[:, 4])
            best_bbox = cls_bboxes[max_ind]
            best_bboxes.append(best_bbox)
            cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]])
            iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
            weight = np.ones((len(iou),), dtype=np.float32)

            assert method in ['nms', 'soft-nms']

            if method == 'nms':
                iou_mask = iou > iou_threshold
                weight[iou_mask] = 0.0

            if method == 'soft-nms':
                weight = np.exp(-(1.0 * iou ** 2 / sigma))

            cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
            score_mask = cls_bboxes[:, 4] > 0.
            cls_bboxes = cls_bboxes[score_mask]

    return best_bboxes

def bboxes_iou(boxes1, boxes2):

    boxes1 = np.array(boxes1)
    boxes2 = np.array(boxes2)

    boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
    boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])

    left_up       = np.maximum(boxes1[..., :2], boxes2[..., :2])
    right_down    = np.minimum(boxes1[..., 2:], boxes2[..., 2:])

    inter_section = np.maximum(right_down - left_up, 0.0)
    inter_area    = inter_section[..., 0] * inter_section[..., 1]
    union_area    = boxes1_area + boxes2_area - inter_area
    ious          = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)

    return ious

In [None]:
########训练#########
trainset = My_dataset
logdir = "./data/log"
NUM_CLASS  = 4
"""
steps_per_epoch = len(trainset)
global_steps = tf.Variable(1, trainable=False, dtype=tf.int64)
warmup_steps = 2 * steps_per_epoch
total_steps = 30 * steps_per_epoch
lr = TRAIN_LR_INIT = 1e-3
TRAIN_LR_END = 1e-6
"""
lr = TRAIN_LR_INIT = 1e-4
YOLOv5,anchors=run()
#print(anchors)
#YOLOv5.summary()
#YOLOv5.load_weights("./yolov5-裂缝")
optimizer = tf.keras.optimizers.Adam(TRAIN_LR_INIT)

#count=0
History=[]
for epoch in range(0,400,1):
    count=0
    My_dataset = My_dataset.shuffle(1000)
    for batch_size in My_dataset:
        count+=1
        train_image = []
        train_label = [] 
        train_image_S = []
        for i in range(len(batch_size)):
            train_image.append(load_image(batch_size[i][0]))
            train_label.append(load_label(batch_size[i][1]))
            #print(batch_size[i][1])
            train_image_S.append(load_image_label(batch_size[i][2]))
            
            #print(np.array(train_image_S[i].shape))
            #print(np.array(train_image[i].shape))
            #print(batch_size[i][2])

        image_data, image_label ,batch_boxes_1,batch_boxes_2,batch_boxes_3,batch_lbboxes,batch_mbboxes,batch_sbboxes = get_train_data(train_image, train_label, train_image_S,anchors)

        image_data=np.array(image_data)

        with tf.GradientTape() as tape:

            output_image, output = YOLOv5(image_data)
            
            decode_output1,decode_output2,decode_output3 = decode(output[0], output[1], output[2],anchors)
            #decode_output1,decode_output2,decode_output3 = output[0],output[1],output[2]
            
            Image_loss=Prob_loss=Giou_loss=Conf_loss=0

            # optimizing process

            Image_loss, Prob_loss, Iou_loss, Conf_loss = compute_loss(output_image, decode_output1,decode_output2,decode_output3,
                                                          batch_boxes_1,batch_boxes_2,batch_boxes_3,
                                                          image_label,
                                                          batch_lbboxes,batch_mbboxes,batch_sbboxes,
                                                          output[0],output[1],output[2])
            
            total_loss =   Image_loss + Prob_loss +  Iou_loss +  Conf_loss
            gradients = tape.gradient(total_loss, YOLOv5.trainable_variables)
            optimizer.apply_gradients(zip(gradients,YOLOv5.trainable_variables))
            tf.print("=>Epoch%4d STEP %4d Prob_loss:%4.2f Iou_loss: %4.2f Conf_loss:%4.2f total_loss:%4.2f" 
                     %(epoch, count,Prob_loss, Iou_loss, Conf_loss,total_loss))     
            
            #学习率更新
            
            global_steps.assign_add(1)
            if global_steps < warmup_steps:
                lr = global_steps / warmup_steps *TRAIN_LR_INIT
            else:
                lr = TRAIN_LR_END + 0.5 * (TRAIN_LR_INIT - TRAIN_LR_END) * (
                    (1 + tf.cos((global_steps - warmup_steps) / (total_steps - warmup_steps) * np.pi))
                )
            optimizer.lr.assign(lr.numpy())
            
            History.append([epoch, optimizer.lr.numpy(),Prob_loss, Iou_loss, Conf_loss,total_loss,mAP])
    
        YOLOv5.save_weights("./YOLO-Crack")