In [1]:
import mxnet as mx

In [2]:
sym, arg_params, aux_params = mx.model.load_checkpoint("cnocr-v1.2.0-densenet-lite-gru", 39)

data_names = ['data']
data_shapes = [(data_names[0], (1, 1, 32, 280))]

pred_fc = sym.get_internals()['pred_fc_output']
sym = mx.sym.softmax(data=pred_fc)

context = mx.cpu()

mod = mx.mod.Module(
    symbol=sym, context=context, data_names=data_names, label_names=None
)
mod.bind(for_training=False, data_shapes=data_shapes)
mod.set_params(arg_params, aux_params, allow_missing=False)

In [3]:
for key, value in mod.get_params()[0].items():
    print(key, value.shape)
    
for key, value in mod.get_params()[1].items():
    print(key, value.shape)

densenet0_stage0_conv0_weight (32, 1, 3, 3)
densenet0_stage0_batchnorm0_gamma (32,)
densenet0_stage0_batchnorm0_beta (32,)
densenet0_stage0_conv1_weight (64, 32, 3, 3)
densenet0_batchnorm0_gamma (65,)
densenet0_batchnorm0_beta (65,)
densenet0_conv0_weight (64, 65, 1, 1)
densenet0_stage1_batchnorm0_gamma (64,)
densenet0_stage1_batchnorm0_beta (64,)
densenet0_stage1_conv0_weight (64, 64, 1, 1)
densenet0_stage1_batchnorm1_gamma (64,)
densenet0_stage1_batchnorm1_beta (64,)
densenet0_stage1_conv1_weight (32, 64, 3, 3)
densenet0_stage1_batchnorm2_gamma (96,)
densenet0_stage1_batchnorm2_beta (96,)
densenet0_stage1_conv2_weight (64, 96, 1, 1)
densenet0_stage1_batchnorm3_gamma (64,)
densenet0_stage1_batchnorm3_beta (64,)
densenet0_stage1_conv3_weight (32, 64, 3, 3)
densenet0_batchnorm1_gamma (128,)
densenet0_batchnorm1_beta (128,)
densenet0_conv1_weight (128, 128, 1, 1)
densenet0_stage2_batchnorm0_gamma (128,)
densenet0_stage2_batchnorm0_beta (128,)
densenet0_stage2_conv0_weight (128, 128, 1, 1

In [4]:
import tensorflow as tf
from tensorflow.keras import layers, Model, Input

In [5]:
def get_model(shape):   # (1, 32, None)  
    data = Input(shape=shape)
    
    x = layers.Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), 
                      padding="same", data_format="channels_first",  dilation_rate=(1, 1), 
                      groups=1, use_bias=False)(data)
    x = layers.BatchNormalization(axis=1, momentum=0.9, epsilon=0.00001)(x)
    x = layers.ReLU()(x)
    x = layers.Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), 
                      padding="same", data_format="channels_first",  dilation_rate=(1, 1), 
                      groups=1, use_bias=False)(x)
    x = layers.Concatenate(axis=1)([data, x])
    
    x = layers.BatchNormalization(axis=1, momentum=0.9, epsilon=0.00001)(x)
    x = layers.ReLU()(x)
    x = layers.Conv2D(filters=64, kernel_size=(1, 1), strides=(1, 1), 
                      data_format="channels_first",  dilation_rate=(1, 1), groups=1, use_bias=False)(x)
    x_1 = layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding="valid", data_format="channels_first")(x)
    
    x = layers.BatchNormalization(axis=1, momentum=0.9, epsilon=0.00001)(x_1)
    x = layers.ReLU()(x)
    x = layers.Conv2D(filters=64, kernel_size=(1, 1), strides=(1, 1), 
                      padding="same", data_format="channels_first",  dilation_rate=(1, 1), 
                      groups=1, use_bias=False)(x)
    x = layers.BatchNormalization(axis=1, momentum=0.9, epsilon=0.00001)(x)
    x = layers.ReLU()(x)
    x = layers.Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), 
                      padding="same", data_format="channels_first",  dilation_rate=(1, 1), 
                      groups=1, use_bias=False)(x)
    x_1 = layers.Concatenate(axis=1)([x_1, x])
    
    x = layers.BatchNormalization(axis=1, momentum=0.9, epsilon=0.00001)(x_1)
    x = layers.ReLU()(x)
    x = layers.Conv2D(filters=64, kernel_size=(1, 1), strides=(1, 1), 
                      padding="same", data_format="channels_first",  dilation_rate=(1, 1), 
                      groups=1, use_bias=False)(x)
    x = layers.BatchNormalization(axis=1, momentum=0.9, epsilon=0.00001)(x)
    x = layers.ReLU()(x)
    x = layers.Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), 
                      padding="same", data_format="channels_first",  dilation_rate=(1, 1), 
                      groups=1, use_bias=False)(x)
    x = layers.Concatenate(axis=1)([x_1, x])
    
    x = layers.BatchNormalization(axis=1, momentum=0.9, epsilon=0.00001)(x)
    x = layers.ReLU()(x)
    x = layers.Conv2D(filters=128, kernel_size=(1, 1), strides=(1, 1), 
                      data_format="channels_first",  dilation_rate=(1, 1), groups=1, use_bias=False)(x)
    x_1 = layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding="valid", data_format="channels_first")(x)
    
    x = layers.BatchNormalization(axis=1, momentum=0.9, epsilon=0.00001)(x_1)
    x = layers.ReLU()(x)
    x = layers.Conv2D(filters=128, kernel_size=(1, 1), strides=(1, 1), 
                      padding="same", data_format="channels_first",  dilation_rate=(1, 1), 
                      groups=1, use_bias=False)(x)
    x = layers.BatchNormalization(axis=1, momentum=0.9, epsilon=0.00001)(x)
    x = layers.ReLU()(x)
    x = layers.Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), 
                      padding="same", data_format="channels_first",  dilation_rate=(1, 1), 
                      groups=1, use_bias=False)(x)
    x_1 = layers.Concatenate(axis=1)([x_1, x])
    
    x = layers.BatchNormalization(axis=1, momentum=0.9, epsilon=0.00001)(x_1)
    x = layers.ReLU()(x)
    x = layers.Conv2D(filters=128, kernel_size=(1, 1), strides=(1, 1), 
                      padding="same", data_format="channels_first",  dilation_rate=(1, 1), 
                      groups=1, use_bias=False)(x)
    x = layers.BatchNormalization(axis=1, momentum=0.9, epsilon=0.00001)(x)
    x = layers.ReLU()(x)
    x = layers.Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), 
                      padding="same", data_format="channels_first",  dilation_rate=(1, 1), 
                      groups=1, use_bias=False)(x)
    x = layers.Concatenate(axis=1)([x_1, x])
    
    x = layers.BatchNormalization(axis=1, momentum=0.9, epsilon=0.00001)(x)
    x = layers.ReLU()(x)
    x = layers.Conv2D(filters=256, kernel_size=(1, 1), strides=(1, 1), 
                      padding="same", data_format="channels_first",  dilation_rate=(1, 1), 
                      groups=1, use_bias=False)(x)
    x = layers.ReLU()(x)
    x = layers.Conv2D(filters=256, kernel_size=(2, 3), strides=(2, 1), 
                      padding="same", data_format="channels_first",  dilation_rate=(1, 1), 
                      groups=256, use_bias=False)(x)
    x = layers.BatchNormalization(axis=1, momentum=0.9, epsilon=0.00001)(x)
    x = layers.ReLU()(x)
    x = layers.MaxPool2D(pool_size=(2, 1), strides=(2, 1), padding="valid", data_format="channels_first")(x)
    x = layers.Reshape([512, -1])(x)
    x = layers.Permute([2, 1])(x)
    
    x = layers.Bidirectional(layers.GRU(128, return_sequences=True))(x)
    x = layers.Dense(6426)(x)
    x = layers.Softmax()(x)
    
    model = Model(data, x)
    
    return model

In [6]:
model = get_model((1, 32, None))
model.build((1, 32, None))

In [7]:
for each in model.weights:
    print(each.name, each.shape)

conv2d/kernel:0 (3, 3, 1, 32)
batch_normalization/gamma:0 (32,)
batch_normalization/beta:0 (32,)
batch_normalization/moving_mean:0 (32,)
batch_normalization/moving_variance:0 (32,)
conv2d_1/kernel:0 (3, 3, 32, 64)
batch_normalization_1/gamma:0 (65,)
batch_normalization_1/beta:0 (65,)
batch_normalization_1/moving_mean:0 (65,)
batch_normalization_1/moving_variance:0 (65,)
conv2d_2/kernel:0 (1, 1, 65, 64)
batch_normalization_2/gamma:0 (64,)
batch_normalization_2/beta:0 (64,)
batch_normalization_2/moving_mean:0 (64,)
batch_normalization_2/moving_variance:0 (64,)
conv2d_3/kernel:0 (1, 1, 64, 64)
batch_normalization_3/gamma:0 (64,)
batch_normalization_3/beta:0 (64,)
batch_normalization_3/moving_mean:0 (64,)
batch_normalization_3/moving_variance:0 (64,)
conv2d_4/kernel:0 (3, 3, 64, 32)
batch_normalization_4/gamma:0 (96,)
batch_normalization_4/beta:0 (96,)
batch_normalization_4/moving_mean:0 (96,)
batch_normalization_4/moving_variance:0 (96,)
conv2d_5/kernel:0 (1, 1, 96, 64)
batch_normalizatio

### 验证输入输出

In [8]:
from collections import namedtuple
import numpy as np

In [9]:
data = np.random.random((16, 1, 32, 280)).astype(np.float32)

In [10]:
def transport_each(mx_layer_name, mx_model):
    if "conv" in mx_layer_name:
        mx_weight = mx_model.get_params()[0][mx_layer_name + "_weight"].asnumpy()
        tf_weights = [tf.transpose(mx_weight, perm=[2, 3, 1, 0])]
    elif "batchnorm" in mx_layer_name:
        mx_gamma = mx_model.get_params()[0][mx_layer_name + "_gamma"].asnumpy()
        mx_beta = mx_model.get_params()[0][mx_layer_name + "_beta"].asnumpy()
        mx_running_mean = mx_model.get_params()[1][mx_layer_name + "_running_mean"].asnumpy()
        mx_running_var = mx_model.get_params()[1][mx_layer_name + "_running_var"].asnumpy()
        
        tf_weights = [mx_gamma, mx_beta, mx_running_mean, mx_running_var]
    elif "gru" in mx_layer_name and "i2h" in mx_layer_name:
        mx_layer_name = mx_layer_name[:-4]
        mx_i2h_weight = mx_model.get_params()[0][mx_layer_name + "_i2h_weight"].asnumpy()
        mx_h2h_weight = mx_model.get_params()[0][mx_layer_name + "_h2h_weight"].asnumpy()
        mx_i2h_bias = mx_model.get_params()[0][mx_layer_name + "_i2h_bias"].asnumpy()
        mx_h2h_bias = mx_model.get_params()[0][mx_layer_name + "_h2h_bias"].asnumpy()
        
        mx_i2h_weight = tf.transpose(mx_i2h_weight, [1, 0])   # (512, 384)
        mx_h2h_weight = tf.transpose(mx_h2h_weight, [1, 0])   # (128, 384)
        
        Wr, Wz, Wh = tf.split(mx_i2h_weight, 3, axis=1)   # (512, 128)
        Rr, Rz, Rh = tf.split(mx_h2h_weight, 3, axis=1)
        Wbr, Wbz, Wbh = tf.split(mx_i2h_bias, 3, axis=0)   # (128,)
        Rbr, Rbz, Rbh = tf.split(mx_h2h_bias, 3, axis=0)
        
        tf_kernel = tf.concat([Wz, Wr, Wh], axis=1)
        tf_recurrent_kernel = tf.concat([Rz, Rr, Rh], axis=1)
        tf_bias = tf.stack([tf.concat([Wbz, Wbr, Wbh], axis=0), tf.concat([Rbz, Rbr, Rbh], axis=0)], axis=0)
        
        tf_weights = [tf_kernel, tf_recurrent_kernel, tf_bias]
    elif "pred_fc" in mx_layer_name:
        mx_weight = mx_model.get_params()[0][mx_layer_name + "_weight"].asnumpy()
        mx_bias = mx_model.get_params()[0][mx_layer_name + "_bias"].asnumpy()
        
        tf_weight = tf.transpose(mx_weight, [1, 0])
        
        tf_weights = [tf_weight, mx_bias]
    else:
        tf_weights = []
    
    return tf_weights

In [11]:
def dedup(a):
    b = list(set(a))
    b.sort(key=a.index)
    
    return b

In [12]:
def transport_weights(mx_model):
    mx_layers = list(map(lambda x: "_".join(x.split("_")[:-1]), mx_model.get_params()[0].keys()))
    mx_layers = dedup(mx_layers)
    
    # tf_layers = [each.name.split("/")[0] for each in tf_model.weights]
    # tf_layers = dedup(tf_layers)
    
    tf_weights = list()
    
    for layer in mx_layers:
        weights = transport_each(layer, mx_model)
        tf_weights.extend(weights)
        
    return tf_weights

In [13]:
model.set_weights(transport_weights(mod))

In [17]:
y_hat = model(data)

In [18]:
y_hat = tf.transpose(y_hat, [1, 0, 2])
y_hat = tf.reshape(y_hat, (-1, 6426))

In [19]:
y_hat.shape

TensorShape([1120, 6426])

In [20]:
sym_test, arg_params_test, aux_params_test = mx.model.load_checkpoint("cnocr-v1.2.0-densenet-lite-gru", 39)

data_names_test = ['data']
data_shapes_test = [(data_names_test[0], (16, 1, 32, 280))]
context_test = mx.cpu()

# pool0_fwd_output_test = sym_test.get_internals()['gru0_rnn0_output']
pool0_fwd_output_test = sym_test.get_internals()['softmaxactivation0_output']

mod_test = mx.mod.Module(
    symbol=pool0_fwd_output_test, context=context_test, data_names=data_names_test, label_names=None
)
mod_test.bind(for_training=False, data_shapes=data_shapes_test)
mod_test.set_params(arg_params_test, aux_params_test, allow_missing=False)

In [21]:
Batch = namedtuple("Batch", ["data"])
mod_test.forward(Batch([mx.nd.array(data)]))

In [22]:
y_hat_mx = mod_test.get_outputs()[0]

In [23]:
y_hat_mx.shape

(1120, 6426)

In [24]:
np.array(y_hat) - y_hat_mx.asnumpy()

array([[-3.54647636e-06, -6.60656951e-09, -8.68567440e-11, ...,
        -1.48361323e-11, -1.06226139e-12, -3.46944695e-16],
       [-1.81794167e-05, -1.25055521e-11, -1.12132525e-13, ...,
        -6.22765728e-16, -1.33226763e-13, -5.09141340e-16],
       [-3.69548798e-05, -6.98491931e-10, -2.06057393e-12, ...,
        -5.57065505e-12, -7.44293516e-13, -1.87566976e-17],
       ...,
       [-1.31130219e-06, -6.48786580e-16, -8.67361738e-18, ...,
        -3.07642366e-18, -1.98729921e-14, -4.04190570e-16],
       [-8.34465027e-07, -3.60822483e-16,  1.12323345e-16, ...,
         1.21972744e-18, -1.38777878e-16,  5.69206141e-19],
       [-4.76837158e-07, -2.45463372e-16, -2.03287907e-18, ...,
        -1.02999206e-18, -1.38777878e-16, -4.22838847e-18]], dtype=float32)

In [25]:
model.save("densenet-lite-gru.h5")





### 使用样本测试

In [32]:
from cnocr.fit.ctc_metrics import CtcMetrics
import cv2

In [33]:
def read_charset(charset_fp):
    alphabet = [None]
    # 第0个元素是预留id，在CTC中用来分割字符。它不对应有意义的字符
    with open(charset_fp, encoding='utf-8') as fp:
        for line in fp:
            alphabet.append(line.rstrip('\n'))
    # print('Alphabet size: %d' % len(alphabet))
    try:
        space_idx = alphabet.index('<space>')
        alphabet[space_idx] = ' '
    except ValueError:
        pass
    inv_alph_dict = {_char: idx for idx, _char in enumerate(alphabet)}
    
    return alphabet

In [34]:
def gen_line_pred_chars(line_prob, img_width, max_img_width, alphabet):
    """
    Get the predicted characters.
    :param line_prob: with shape of [seq_length, num_classes]
    :param img_width:
    :param max_img_width:
    :return:
    """
    class_ids = np.argmax(line_prob, axis=-1)

    if img_width < max_img_width:
        comp_ratio = 8
        end_idx = img_width // comp_ratio
        if end_idx < len(class_ids):
            class_ids[end_idx:] = 0
    prediction, start_end_idx = CtcMetrics.ctc_label(class_ids.tolist())

    res = [alphabet[p] if alphabet[p] != '<space>' else ' ' for p in prediction]

    return res

In [35]:
alphabet = read_charset("label_cn.txt")

In [39]:
test_img_path = "../zzb0001.jpg"
test_img = cv2.imread(test_img_path, 0)
height, width = test_img.shape
rate = 32 / height
width *= rate
test_img = cv2.resize(test_img, (int(width), 32))
test_img = np.expand_dims(test_img, axis=0)
test_img = np.expand_dims(test_img, axis=0)
test_img = test_img.astype(np.float32)
test_img /= 255.

In [43]:
line_prob = model(test_img)[0]

In [45]:
"".join(gen_line_pred_chars(line_prob, width, width, alphabet))

'山西云通软件省管干部年度考核测评表'