In [3]:
import tensorflow as tf
from tensorflow.keras import layers, models

def bbox_loss(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_true - y_pred))

def create_detection_model(input_shape, num_classes):
    base_model = tf.keras.applications.MobileNetV2(input_shape=input_shape, include_top=False, weights='imagenet')
    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(1024, activation='relu')(x)
    x = layers.Dense(num_classes * 4, activation='sigmoid')(x)  # 4 for bounding box coordinates
    detection_model = models.Model(inputs=base_model.input, outputs=x)
    return detection_model

input_shape = (128, 128, 3)
num_classes = 1  # 只检测一个类别，即字符
detection_model = create_detection_model(input_shape, num_classes)
detection_model.compile(optimizer='adam', loss=bbox_loss)

In [4]:
def create_crnn_model(imgH, nc, nclass, nh):
    input_data = layers.Input(shape=(imgH, None, nc), name='input')

    cnn = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(input_data)
    cnn = layers.MaxPooling2D(pool_size=(2, 2))(cnn)
    cnn = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(cnn)
    cnn = layers.MaxPooling2D(pool_size=(2, 2))(cnn)
    cnn = layers.Conv2D(256, (3, 3), padding='same', activation='relu')(cnn)
    cnn = layers.BatchNormalization()(cnn)
    cnn = layers.Conv2D(256, (3, 3), padding='same', activation='relu')(cnn)
    cnn = layers.MaxPooling2D(pool_size=(2, 1))(cnn)
    cnn = layers.Conv2D(512, (3, 3), padding='same', activation='relu')(cnn)
    cnn = layers.BatchNormalization()(cnn)
    cnn = layers.Conv2D(512, (3, 3), padding='same', activation='relu')(cnn)
    cnn = layers.MaxPooling2D(pool_size=(2, 1))(cnn)
    cnn = layers.Conv2D(512, (2, 2), padding='valid', activation='relu')(cnn)

    cnn = layers.Reshape(target_shape=(-1, 512))(cnn)
    rnn = layers.Bidirectional(layers.LSTM(nh, return_sequences=True))(cnn)
    rnn = layers.Bidirectional(layers.LSTM(nh, return_sequences=True))(rnn)
    dense = layers.Dense(nclass, activation='softmax')(rnn)

    crnn_model = models.Model(inputs=input_data, outputs=dense)
    return crnn_model

imgH = 32
nc = 1  # 输入图像的通道数 (灰度图像)
nclass = 10 + 1  # 10个数字和一个空白字符
nh = 256  # LSTM的隐藏单元数

crnn_model = create_crnn_model(imgH, nc, nclass, nh)
crnn_model.compile(optimizer='adam', loss='ctc_loss')


In [None]:
# 创建目标检测模型
input_shape = (128, 128, 3)
num_classes = 10  # 对应字符数
detection_model = create_detection_model(input_shape, num_classes)

# 创建CRNN模型
imgH = 32
nc = 1
nclass = 10 + 1
nh = 256
crnn_model = create_crnn_model(imgH, nc, nclass, nh)

# 将目标检测模型和CRNN模型结合
inputs = layers.Input(shape=(128, 128, 3))
bboxes = detection_model(inputs)
# 将边界框裁剪后的图像送入CRNN模型进行字符识别
# 假设 `crop_and_resize` 是一个函数，用于裁剪图像并调整为CRNN模型输入大小
cropped_images = crop_and_resize(inputs, bboxes, imgH, 100)
crnn_outputs = crnn_model(cropped_images)

combined_model = models.Model(inputs=inputs, outputs=crnn_outputs)