In [10]:
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.layers as layers

In [11]:
def VGG(input_tensor):
    """
    The original feature extraction structure from CRNN paper.
    Related paper: http://arxiv.org/abs/1507.05717
    """
    x = layers.Conv2D(
        filters=64, 
        kernel_size=3, 
        padding='same',
        activation='relu')(input_tensor)
    x = layers.MaxPool2D(pool_size=2, padding='same')(x)

    x = layers.Conv2D(
        filters=128, 
        kernel_size=3, 
        padding='same',
        activation='relu')(x)
    x = layers.MaxPool2D(pool_size=2, padding='same')(x)

    for i in range(2):
        x = layers.Conv2D(filters=256, kernel_size=3, padding='same',
                          activation='relu')(x)
    x = layers.MaxPool2D(pool_size=(2, 2), strides=(2, 1), padding='same')(x)

    for i in range(2):
        x = layers.Conv2D(filters=512, kernel_size=3, padding='same')(x)
        x = layers.BatchNormalization()(x)
        x = layers.Activation('relu')(x)
    x = layers.MaxPool2D(pool_size=(2, 2), strides=(2, 1), padding='same')(x)

    x = layers.Conv2D(filters=512, kernel_size=2, activation='relu')(x)
    return x

In [16]:
def crnn(num_classes):
    img_input = keras.Input(shape=(32, None, 1))

    x = VGG(img_input)
    x = layers.Reshape((-1, 512))(x)

    x = layers.Bidirectional(layers.LSTM(units=256, return_sequences=True))(x)
    x = layers.Bidirectional(layers.LSTM(units=256, return_sequences=True))(x)
    x = layers.Dense(units=num_classes)(x)
    return keras.Model(inputs=img_input, outputs=x, name='CRNN')

In [17]:
model = crnn(26)

In [18]:
model.summary()

Model: "CRNN"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 32, None, 1)]     0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 32, None, 64)      640       
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 16, None, 64)      0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 16, None, 128)     73856     
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 8, None, 128)      0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 8, None, 256)      295168    
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 8, None, 256)      590080 