In [6]:
import tensorflow as tf
from tensorflow.keras import layers, models

def bbox_loss(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_true - y_pred))

def create_detection_model(input_shape, num_classes):
    base_model = tf.keras.applications.MobileNetV2(input_shape=input_shape, include_top=False, weights='imagenet')
    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(1024, activation='relu')(x)
    x = layers.Dense(num_classes * 4, activation='sigmoid')(x)  # 4 for bounding box coordinates
    detection_model = models.Model(inputs=base_model.input, outputs=x)
    return detection_model

input_shape = (400, 300, 3)
num_classes = 1  # 只检测一个类别，即字符
detection_model = create_detection_model(input_shape, num_classes)
detection_model.compile(optimizer='adam', loss=bbox_loss)

  base_model = tf.keras.applications.MobileNetV2(input_shape=input_shape, include_top=False, weights='imagenet')


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


In [None]:
def create_crnn_model(imgH, nc, nclass, nh):
    input_data = layers.Input(shape=(imgH, None, nc), name='input')

    cnn = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(input_data)
    cnn = layers.MaxPooling2D(pool_size=(2, 2))(cnn)
    cnn = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(cnn)
    cnn = layers.MaxPooling2D(pool_size=(2, 2))(cnn)
    cnn = layers.Conv2D(256, (3, 3), padding='same', activation='relu')(cnn)
    cnn = layers.BatchNormalization()(cnn)
    cnn = layers.Conv2D(256, (3, 3), padding='same', activation='relu')(cnn)
    cnn = layers.MaxPooling2D(pool_size=(2, 1))(cnn)
    cnn = layers.Conv2D(512, (3, 3), padding='same', activation='relu')(cnn)
    cnn = layers.BatchNormalization()(cnn)
    cnn = layers.Conv2D(512, (3, 3), padding='same', activation='relu')(cnn)
    cnn = layers.MaxPooling2D(pool_size=(2, 1))(cnn)
    cnn = layers.Conv2D(512, (2, 2), padding='valid', activation='relu')(cnn)

    cnn = layers.Reshape(target_shape=(-1, 512))(cnn)
    rnn = layers.Bidirectional(layers.LSTM(nh, return_sequences=True))(cnn)
    rnn = layers.Bidirectional(layers.LSTM(nh, return_sequences=True))(rnn)
    dense = layers.Dense(nclass, activation='softmax')(rnn)

    crnn_model = models.Model(inputs=input_data, outputs=dense)
    return crnn_model

imgH = 32
nc = 1  # 输入图像的通道数 (灰度图像)
nclass = 10 + 1  # 10个数字和一个空白字符
nh = 256  # LSTM的隐藏单元数

crnn_model = create_crnn_model(imgH, nc, nclass, nh)
crnn_model.compile(optimizer='adam', loss='ctc_loss')

In [None]:
def crop_image(image, bbox):
    x_min, y_min, x_max, y_max = bbox
    return image[y_min:y_max, x_min:x_max]

In [None]:
import numpy as np

def detect_and_recognize(image, detection_model, crnn_model, imgH, imgW):
    # 预处理图像
    input_image = cv2.resize(image, (400, 300))
    image_src = input_image
    # 灰度
    gray = cv2.cvtColor(input_image, cv2.COLOR_BGR2GRAY)
    # 中值滤波去噪
    gray = cv2.medianBlur(gray, 5)
    # 去照光
    adaptive_thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY, 11, 2)
    
    input_image = adaptive_thresh.astype('float32') / 255.0
    input_image = np.expand_dims(input_image, axis=0)

    # 检测字符位置
    bboxes = detection_model.predict(input_image)[0]
    bboxes = bboxes.reshape(-1, 4)

    recognized_texts = []
    for bbox in bboxes:
        # 将边界框坐标还原到原始图像尺寸
        bbox = bbox * [image.shape[1], image.shape[0], image.shape[1], image.shape[0]]
        bbox = bbox.astype(int)

        # 裁剪字符区域
        cropped_image = crop_image(image, bbox)
        cropped_image = cv2.resize(cropped_image, (imgW, imgH))
        cropped_image = cropped_image.astype('float32') / 255.0
        cropped_image = np.expand_dims(cropped_image, axis=-1)
        cropped_image = np.expand_dims(cropped_image, axis=0)

        # 识别字符
        y_pred = crnn_model.predict(cropped_image)
        y_pred = np.argmax(y_pred, axis=2)[0]
        recognized_text = ''.join([str(c) for c in y_pred if c != -1])

        recognized_texts.append(recognized_text)

    return recognized_texts

In [None]:
import cv2

# 加载训练好的目标检测模型和CRNN模型
detection_model = create_detection_model(input_shape, num_classes)
detection_model.load_weights('detection_model_weights.h5')

crnn_model = create_crnn_model(imgH, nc, nclass, nh)
crnn_model.load_weights('crnn_model_weights.h5')

# 读取图像
image_path = 'images/hefei_3188.jpg'
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

# 使用组合模型进行字符检测和识别
recognized_texts = detect_and_recognize(image, detection_model, crnn_model, imgH, 100)

print(f'Recognized Texts: {recognized_texts}')