Взяли модельку и набор данных с моей работки. Моделька делает OCR

In [None]:
from google.colab import drive


drive.mount('/content/drive')

Mounted at /content/drive


# Берем и смотрим данные

In [None]:
import h5py
import pandas as pd

with h5py.File('/content/drive/MyDrive/Методы компрессии/common_fields_images.h5') as f:
    images = f['images'][:]
    additional_bits = f['additional_bit'][:]

with open('/content/drive/MyDrive/Методы компрессии/common_fields_labels.txt', encoding='cp1251') as f:
    markup = [e.strip() for e in f.readlines()]

In [None]:
images.shape, additional_bits.shape, len(markup)

((16462, 32, 400), (16462,), 16462)

# Кодируем данные и подгоняем размерности

In [None]:
import pandas as pd
import numpy as np

def encode_texts(texts):
    def _label_to_num(label, alphabet):
        label_num = []
        for ch in label:
            label_num.append(alphabet.find(ch))
        return np.array(label_num)

    alphabet = ''.join(sorted(pd.Series(texts).apply(list).apply(pd.Series).stack().unique()))

    nums = np.ones([len(texts), max([len(text) for text in texts])], dtype='int64') * len(alphabet)
    for i, text in enumerate(texts):
        nums[i][:len(text)] = _label_to_num(text, alphabet)

    return nums, alphabet

In [None]:
labels_encoded, alphabet = encode_texts(markup)

In [None]:
images = images.astype('float64') / 255

additional_bits_expanded = np.zeros((len(images), 50, 2))
additional_bits_expanded[:, :, additional_bits] = 1

In [None]:
additional_bits_expanded.shape

(16462, 50, 2)

# ФП, метрика и модель

In [None]:
import tensorflow as tf
from keras import backend as K


class CER(tf.keras.metrics.Metric):
    """
    A custom Keras metric to compute the Character Error Rate
    """
    def __init__(self, name='CER', decode_greedy=True, **kwargs):
        super(CER, self).__init__(name=name, **kwargs)
        self.decode_greedy = decode_greedy
        self.cer_accumulator = self.add_weight(name="total_cer", initializer="zeros")
        self.counter = self.add_weight(name="cer_count", initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        input_shape = K.shape(y_pred)
        input_length = tf.ones(shape=input_shape[0]) * K.cast(input_shape[1], 'float32')

        decode, log = K.ctc_decode(y_pred, input_length, greedy=True)

        decode = K.ctc_label_dense_to_sparse(decode[0], K.cast(input_length, 'int32'))
        y_true_sparse = K.ctc_label_dense_to_sparse(y_true, K.cast(input_length, 'int32'))
        y_true_sparse = tf.sparse.retain(y_true_sparse, tf.not_equal(y_true_sparse.values, tf.math.reduce_max(y_true_sparse.values)))

        decode = tf.sparse.retain(decode, tf.not_equal(decode.values, -1))
        distance = tf.edit_distance(decode, y_true_sparse, normalize=True)

        self.cer_accumulator.assign_add(tf.reduce_sum(distance))
        self.counter.assign_add(K.cast(len(y_true), 'float32'))

    def result(self):
        return tf.math.divide_no_nan(self.cer_accumulator, self.counter)

    def reset_state(self):
        self.cer_accumulator.assign(0.0)
        self.counter.assign(0.0)


def CTCLoss(y_true, y_pred):
    """
    Compute the training-time loss value
    """
    batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
    input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
    label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")

    input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
    label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")

    loss = K.ctc_batch_cost(y_true, y_pred, input_length, label_length)
    return loss

In [None]:
model = tf.keras.models.load_model('/content/drive/MyDrive/Методы компрессии/crnn_common_fields.h5', custom_objects={'CTCLoss': CTCLoss, 'CER': CER})

In [None]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 32, 400, 1)]         0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 32, 400, 32)          320       ['input_2[0][0]']             
                                                                                                  
 batch_normalization (Batch  (None, 32, 400, 32)          128       ['conv2d[0][0]']              
 Normalization)                                                                                   
                                                                                                  
 leaky_re_lu (LeakyReLU)     (None, 32, 400, 32)          0         ['batch_normalization[0][0

Архитектура модели, если интересно, ниже. Чтобы увеличить можно поменять стиль картинки

<img src="crnn_common_fields.png" alt="Alternative text" style="height:1000px"/>

# Непосредственно замер

Не уверен, как правильно замерять скорость inference в TF, оставлю так пока

In [None]:
import time
from tqdm import trange

NRUNS = 10
DLEN = 1280

start = time.time()

for i in trange(NRUNS):
  y_pred = model.predict([additional_bits_expanded[:DLEN], images[:DLEN]], batch_size=64, verbose=0)

print(f'Time spent: {(round((time.time()-start) / NRUNS / DLEN, 6))}')

loss = CTCLoss(labels_encoded[:DLEN], y_pred)
cer = CER()
cer.update_state(labels_encoded[:DLEN], y_pred)

print(f'loss: {round(tf.reduce_mean(loss).numpy(), 6)}, Character Error Rate: {round(cer.result().numpy(), 6)}')

100%|██████████| 10/10 [05:36<00:00, 33.61s/it]


Time spent: 0.02626
loss: 0.007197999861091375, Character Error Rate: 0.0


Правильно использовать для валидации весь датасет т.к. на тренировочной части и правда может быть нулевая ошибка