In [1]:
import tensorflow as tf
from tensorflow import keras as K
from tensorflow.keras import layers
import numpy as np

In [24]:
# Model Structure: 5 CNN
# 2 5x5 kernel, 3 3x3 kernels
# Non-linear RELU
# MaxPool

def backend_reshape(x):
    return tf.keras.backend.reshape(x, (32,-1,80))


def ctc_loss_fn(y_true, y_pred):
    print("Y Prediction: ", y_pred.shape)
    """
    y_pred = tf.transpose(y_pred, [1, 0, 2])

    if len(y_true.shape) > 2:
        y_true = tf.squeeze(y_true)

    # y_pred.shape = (batch_size, string_length, alphabet_size_1_hot_encoded)
    # output of every model is softmax
    # so sum across alphabet_size_1_hot_encoded give 1
    #               string_length give string length
    input_length = tf.math.reduce_sum(y_pred, axis=-1, keepdims=False)
    input_length = tf.math.reduce_sum(input_length, axis=-1, keepdims=True)
    
    
    # y_true strings are padded with 0
    # so sum of non-zero gives number of characters in this string
    label_length = tf.math.count_nonzero(y_true, axis=-1, keepdims=True, dtype="int64")
    
    
    
    loss = tf.keras.backend.ctc_batch_cost(y_true, y_pred, input_length, label_length)

    # average loss across all entries in the batch
    loss = tf.reduce_mean(loss)
    """
    return 

model = K.Sequential(
    [
        
        layers.Conv2D(filters=32, kernel_size = 5, strides=1, padding="same", input_shape = (128, 32,1), activation="relu"),
        layers.MaxPooling2D(pool_size=2, strides = (2,2)),
        
        layers.Conv2D(filters=64, kernel_size = 5, strides=1, padding="same", activation="relu"),
        layers.MaxPooling2D(pool_size=2, strides = (2,2)),
        
        layers.Conv2D(filters=128, kernel_size = 3, strides=1, padding="same", activation="relu"),
        layers.MaxPooling2D(pool_size=(1,2), strides = (1,2)),
        
        layers.Conv2D(filters=128, kernel_size = 3, strides=1, padding="same", activation="relu"),
        layers.MaxPooling2D(pool_size=(1,2), strides = (1,2)),
        
        layers.Conv2D(filters=256, kernel_size = 3, strides=1, padding="same", activation="relu"),
        layers.MaxPooling2D(pool_size=(1,2), strides = (1,2)),
        
        layers.Reshape((32,256)),
        
        layers.Bidirectional(layers.LSTM(256, return_sequences=True)),
        
        layers.Reshape((32,1, 512)),
        
        layers.Conv2D(filters=80, kernel_size = 3, padding="same", dilation_rate=1, activation="softmax"),
        layers.Reshape((32,80)),
        
        # layers.Lambda(backend_reshape)
    ]
)

model.summary()
model.compile(optimizer="rmsprop", loss=ctc_loss_fn, metrics=['accuracy'])


Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_36 (Conv2D)           (None, 128, 32, 32)       832       
_________________________________________________________________
max_pooling2d_30 (MaxPooling (None, 64, 16, 32)        0         
_________________________________________________________________
conv2d_37 (Conv2D)           (None, 64, 16, 64)        51264     
_________________________________________________________________
max_pooling2d_31 (MaxPooling (None, 32, 8, 64)         0         
_________________________________________________________________
conv2d_38 (Conv2D)           (None, 32, 8, 128)        73856     
_________________________________________________________________
max_pooling2d_32 (MaxPooling (None, 32, 4, 128)        0         
_________________________________________________________________
conv2d_39 (Conv2D)           (None, 32, 4, 128)       

In [3]:
# some_file.py
import sys
# insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, '/SimpleHTR')

from SimpleHTR.src.DataLoader import DataLoader
loader =  DataLoader(filePath='./SimpleHTR/data/', batchSize=50, imgSize=(128,32), maxTextLen=32)

In [4]:
	def toSparse(texts, charList):
		"put ground truth texts into sparse tensor for ctc_loss"
		indices = []
		values = []
		shape = [len(texts), 0] # last entry must be max(labelList[i])

		# go over all texts
		for (batchElement, text) in enumerate(texts):
			# convert to string of label (i.e. class-ids)
			labelStr = [charList.index(c) for c in text]
			# sparse tensor must have size of max. label-string
			if len(labelStr) > shape[1]:
				shape[1] = len(labelStr)
			# put each label into sparse tensor
			for (i, label) in enumerate(labelStr):
				indices.append([batchElement, i])
				values.append(label)

		return (indices, values, shape)


In [13]:
from SimpleHTR.src.SamplePreprocessor import preprocess
import cv2

gtTexts = [loader.samples[i].gtText for i in range(len(loader.samples))]
indices, values, dense_shape = toSparse(gtTexts[:int(len(gtTexts)*.95)], loader.charList)
dense_shape[1] = 32
y_train = tf.sparse.SparseTensor(indices, values, dense_shape)

imgs_train = [preprocess(cv2.imread(loader.samples[i].filePath, cv2.IMREAD_GRAYSCALE), (128,32), False) for i in range(int(len(gtTexts)*.95))]

In [23]:
y_train[0]

TypeError: 'SparseTensor' object is not subscriptable

In [14]:
imgs_train = np.asarray(imgs_train)
imgs_train = imgs_train.reshape((imgs_train.shape[0], imgs_train.shape[1], imgs_train.shape[2], 1))

tf.convert_to_tensor(imgs_train)

<tf.Tensor: shape=(23750, 128, 32, 1), dtype=float64, numpy=
array([[[[ 0.35059005],
         [ 0.39148473],
         [ 0.12566928],
         ...,
         [ 0.12566928],
         [ 0.35059005],
         [ 0.39148473]],

        [[ 0.37103739],
         [ 0.39148473],
         [ 0.33014271],
         ...,
         [ 0.12566928],
         [-0.05835679],
         [-0.1605935 ]],

        [[ 0.39148473],
         [ 0.02343257],
         [ 0.39148473],
         ...,
         [-0.05835679],
         [ 0.12566928],
         [ 0.35059005]],

        ...,

        [[ 0.39148473],
         [ 0.39148473],
         [ 0.39148473],
         ...,
         [ 0.39148473],
         [ 0.39148473],
         [ 0.39148473]],

        [[ 0.39148473],
         [ 0.39148473],
         [ 0.39148473],
         ...,
         [ 0.39148473],
         [ 0.39148473],
         [ 0.39148473]],

        [[ 0.39148473],
         [ 0.39148473],
         [ 0.39148473],
         ...,
         [ 0.39148473],
         [ 0.39

In [25]:
model.fit(
    x=imgs_train,
    y=y_train,
    epochs=1,
    verbose=1
)

Y Prediction:  (None, 32, 80)


ValueError: in user code:

    C:\Users\Joshua\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\engine\training.py:571 train_function  *
        outputs = self.distribute_strategy.run(
    C:\Users\Joshua\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\distribute\distribute_lib.py:951 run  **
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\Joshua\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\distribute\distribute_lib.py:2290 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\Joshua\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\distribute\distribute_lib.py:2649 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\Joshua\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\engine\training.py:541 train_step  **
        self.trainable_variables)
    C:\Users\Joshua\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\engine\training.py:1804 _minimize
        trainable_variables))
    C:\Users\Joshua\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\optimizer_v2\optimizer_v2.py:521 _aggregate_gradients
        filtered_grads_and_vars = _filter_grads(grads_and_vars)
    C:\Users\Joshua\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\optimizer_v2\optimizer_v2.py:1219 _filter_grads
        ([v.name for _, v in grads_and_vars],))

    ValueError: No gradients provided for any variable: ['conv2d_36/kernel:0', 'conv2d_36/bias:0', 'conv2d_37/kernel:0', 'conv2d_37/bias:0', 'conv2d_38/kernel:0', 'conv2d_38/bias:0', 'conv2d_39/kernel:0', 'conv2d_39/bias:0', 'conv2d_40/kernel:0', 'conv2d_40/bias:0', 'bidirectional_6/forward_lstm_6/lstm_cell_19/kernel:0', 'bidirectional_6/forward_lstm_6/lstm_cell_19/recurrent_kernel:0', 'bidirectional_6/forward_lstm_6/lstm_cell_19/bias:0', 'bidirectional_6/backward_lstm_6/lstm_cell_20/kernel:0', 'bidirectional_6/backward_lstm_6/lstm_cell_20/recurrent_kernel:0', 'bidirectional_6/backward_lstm_6/lstm_cell_20/bias:0', 'conv2d_41/kernel:0', 'conv2d_41/bias:0'].
