In [1]:
from tensorflow.keras import layers

In [2]:
# 輸入照片大小
input_shape_img = (1024, 1024, 3)
img_input = layers.Input(shape=input_shape_img)

In [3]:
# 建立cnn(vgg)層，提取特徵
def cnn_base(img_input):
    x = layers.Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu', name='block1_conv1')(img_input)
    x = layers.Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu', name='block1_conv2')(x)
    #  1024x1024 -> 512x512
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
    
    
    x = layers.Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu', name='block2_conv1')(x)
    x = layers.Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu', name='block2_conv2')(x)
    # 512x512 -> 256x256
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
    
    x = layers.Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu', name='block3_conv1')(x)
    x = layers.Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu', name='block3_conv2')(x)
    x = layers.Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu', name='block3_conv3')(x)
    # 256x256 -> 128x128
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
    
    x = layers.Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', name='block4_conv1')(x)
    x = layers.Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', name='block4_conv2')(x)
    x = layers.Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', name='block4_conv3')(x)
    # 128x128 -> 64x64
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
    
    x = layers.Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', name='block5_conv1')(x)
    x = layers.Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', name='block5_conv2')(x)
    x = layers.Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', name='block5_conv3')(x)
    
    # 返回feature map 64*64*512
    return x

In [18]:
def rpn(base_layers, num_anchors):
    x = layers.Conv2D(512, (3, 3), padding='same', activation='relu', 
                      kernel_initializer='normal', name='rpn_conv1')(base_layers)
    # rpn分類和迴歸
    x_class = layers.Conv2D(num_anchors*2, (1, 1), activation='softmax', name='rpn_out_class')(x)
    x_reg = layers.Conv2D(num_anchors*4, (1, 1), activation='linear', name='rpn_out_regress')(x)
    
    return x_class, x_reg, base_layers

In [19]:
base_layer = cnn_base(img_input)
base_layer

<tf.Tensor 'block5_conv3_4/Identity:0' shape=(None, 64, 64, 512) dtype=float32>

In [20]:
x_class, x_reg, base_layers = rpn(base_layer, 9)

In [21]:
print('Classification支線：', x_class) # '''確認深度是否為18'''
print('BBOX Regression 支線：', x_reg) #'''確認深度是否為36'''
print('CNN Output：', base_layers)

Classification支線： Tensor("rpn_out_class/Identity:0", shape=(None, 64, 64, 18), dtype=float32)
BBOX Regression 支線： Tensor("rpn_out_regress/Identity:0", shape=(None, 64, 64, 36), dtype=float32)
CNN Output： Tensor("block5_conv3_4/Identity:0", shape=(None, 64, 64, 512), dtype=float32)
