-
Notifications
You must be signed in to change notification settings - Fork 0
/
Yolo_Detector.py
101 lines (83 loc) · 4.02 KB
/
Yolo_Detector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import tensorflow as tf
_ANCHORS = []
_MODEL_SIZE = ()
# 2-D convolution
def Conv2d(inputs, filters, kernel_size, strides = 1):
return tf.layers.conv2d(inputs = inputs, filters = filters, kernel_size = kernel_size, strides = strides, padding = ('SAME' if strides == 1 else 'VALID'))
# Batch normalization
def BatchNorm(inputs):
return tf.layers.batch_normalization(inputs = inputs, axis = 1)
# Convolution operations layer block
def Convolution_block(inputs, filters):
inputs = Conv2d(inputs, filters = filters, kernel_size = 1)
inputs = BatchNorm(inputs)
inputs = tf.nn.leaky_relu(inputs)
inputs = Conv2d(inputs, filters = 2 * filters, kernel_size = 3)
inputs = BatchNorm(inputs)
inputs = tf.nn.leaky_relu(inputs)
inputs = Conv2d(inputs, filters = filters, kernel_size = 1)
inputs = BatchNorm(inputs)
inputs = tf.nn.leaky_relu(inputs)
inputs = Conv2d(inputs, filters = 2 * filters, kernel_size = 3)
inputs = BatchNorm(inputs)
inputs = tf.nn.leaky_relu(inputs)
inputs = Conv2d(inputs, filters = filters, kernel_size = 1)
inputs = BatchNorm(inputs)
inputs = tf.nn.leaky_relu(inputs)
UpOutput = inputs
inputs = Conv2d(inputs, filters=2 * filters, kernel_size=3)
inputs = BatchNorm(inputs)
inputs = tf.nn.leaky_relu(inputs)
return UpOutput, inputs
# Final detection layer of three detectors
def Detect(inputs, n_classes, anchors, img_size):
n_anchors = len(anchors)
inputs = tf.layers.conv2d(inputs, filters = n_anchors * (5 + n_classes), kernel_size = 1, strides = 1)
shape = inputs.get_shape().as_list()
grid_shape = shape[2:4]
inputs = tf.reshape(inputs, [-1, n_anchors * grid_shape[0] * grid_shape[1], 5 + n_classes])
strides = (img_size[0] // grid_shape[0], img_size[1] // grid_shape[1])
box_centers, box_shapes, confidence, classes = tf.split(inputs, [2, 2, 1, n_classes], axis = -1)
x = tf.range(grid_shape[0], dtype = tf.float32)
y = tf.range(grid_shape[1], dtype = tf.float32)
x_offset, y_offset = tf.meshgrid(x, y)
x_offset, y_offset = tf.reshape(x_offset, (-1, 1)), tf.reshape(y_offset, (-1, 1))
x_y_offset = tf.concat([x_offset, y_offset], axis = -1)
x_y_offset = tf.tile(x_y_offset, [1, n_anchors])
x_y_offset = tf.reshape(x_y_offset, [1, -1, 2])
box_centers = tf.nn.sigmoid(box_centers)
box_centers = (box_centers + x_y_offset) * strides
anchors = tf.tile(anchors, [grid_shape[0] * grid_shape[1], 1])
box_shapes = tf.exp(box_shapes) * tf.to_float(anchors)
confidence = tf.nn.sigmoid(confidence)
classes = tf.nn.sigmoid(classes)
inputs = tf.concat([box_centers, box_shapes, confidence, classes], axis = -1)
return inputs
# Upsample the feature map using nearest neighbor interpolation
def UpSample(inputs, out_shape):
new_height = out_shape[0]
new_width = out_shape[1]
inputs = tf.image.resize_nearest_neighbor(inputs, (new_height, new_width))
return inputs
# Main driver function for the entire feature map fusion and prediction
def Main_pipeline(F1, F2, F3, n_classes, model_size):
F1, F1_predict = Convolution_block(F1, filters = 64)
Detector1 = Detect(F1_predict, n_classes = n_classes, anchors = _ANCHORS[0:3], img_size = model_size)
F1 = Conv2d(F1, filters = 128, kernel_size = 1)
F1 = BatchNorm(F1)
F1 = tf.nn.leaky_relu(F1)
Up_size = F2.get_shape().as_list()
F1 = UpSample(F1, out_shape = Up_size)
F2 = tf.concat([F1, F2])
F2, F2_predict = Convolution_block(F2, filters = 128)
Detector2 = Detect(F2_predict, n_classes = n_classes, anchors = _ANCHORS[3:6], img_size = model_size)
F2 = Conv2d(F2, filters = 256, kernel_size = 1)
F2 = BatchNorm(F2)
F2 = tf.nn.leaky_relu(F2)
Up_size = F3.get_shape().as_list()
F2 = UpSample(F2, out_shape = Up_size)
F3, F3_predict = tf.concat([F2, F3])
F3 = Convolution_block(F3_predict, filters = 256)
Detector3 = Detect(F2, n_classes = n_classes, anchors = _ANCHORS[6:9], img_size = model_size)
Detector = tf.concat([Detector1, Detector2, Detector3], axis = 1)
return Detector