In [1]:
import math
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras import layers
from tensorflow.keras.layers import Input, Conv1D, Conv2D, MaxPooling2D, Activation, Reshape, Bidirectional, LSTM, Dense, Lambda, Layer, Dropout
from tensorflow.keras.optimizers import Adam
%run data_loader_final.ipynb
%matplotlib inline
np.random.seed(1)

[56, 12, 67, 12, 0, 68, 57, 66, 59, 73, 61, 66]
[[56 12 67 12  0 68 57 66 59 73 61 66 79 79 79 79 79 79 79 79 79 79 79 79
  79 79 79 79 79 79 79 79]]
(array([[[251, 251, 250, ..., 255, 255, 255],
        [250, 249, 250, ..., 255, 255, 255],
        [250, 251, 249, ..., 255, 255, 255],
        ...,
        [246, 248, 245, ..., 255, 255, 255],
        [247, 248, 246, ..., 255, 255, 255],
        [249, 248, 246, ..., 255, 255, 255]],

       [[255, 255, 255, ..., 255, 255, 255],
        [255, 255, 255, ..., 255, 255, 255],
        [255, 255, 255, ..., 255, 255, 255],
        ...,
        [255, 255, 244, ..., 255, 255, 255],
        [255, 255, 246, ..., 255, 255, 255],
        [255, 255, 249, ..., 255, 255, 255]],

       [[255, 255, 255, ..., 255, 255, 255],
        [255, 255, 255, ..., 255, 255, 255],
        [255, 255, 255, ..., 255, 255, 255],
        ...,
        [154, 191, 247, ..., 255, 255, 255],
        [151, 203, 254, ..., 255, 255, 255],
        [213, 238, 253, ..., 255, 255, 25

In [2]:
X_train, X_test, Y_train, Y_test = data_loader('words_trial', 'words_trial.txt') 
X_train = X_train/255
X_train = np.reshape(X_train, (-1, 32, 128, 1))
X_test = X_test/255
#print(np.shape(X_train))
#print(Y_train[1])

In [3]:
class CTCLayer(Layer):
    def __init__(self, name=None):
        super().__init__(name=name)
        self.loss_fn = tf.keras.backend.ctc_batch_cost

    def call(self, y_true, y_pred):
        batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
        input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
        label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")

        input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
        label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")

        loss = self.loss_fn(y_true, y_pred, input_length, label_length)
        self.add_loss(loss)

        return y_pred


In [7]:
def build_model():

    input_img = Input(shape=(32, 128, 1), name="image", dtype="float32")
    labels =  Input(name="label", shape=(None,), dtype="float32")

    conv1 = Conv2D(16, (5, 5),activation="relu",kernel_initializer="he_normal", padding="same", name="Conv1")(input_img)
    pool1 = MaxPooling2D((2, 2), name="pool1")(conv1)


    conv2 = Conv2D(32, (5, 5), activation="relu", kernel_initializer="he_normal", padding="same", name="Conv2")(pool1)
    pool2 = MaxPooling2D((2, 2), name="pool2")(conv2)
    
    conv3 = Conv2D(64, (3, 3), activation="relu", kernel_initializer="he_normal", padding="same", name="Conv3")(pool2)
    pool3 = MaxPooling2D((2, 1), name="pool3")(conv3)
    
    conv4 = Conv2D(128, (3, 3), activation="relu", kernel_initializer="he_normal", padding="same", name="Conv4")(pool3)
    pool4 = MaxPooling2D((2, 1), name="pool4")(conv4)
    
    conv5 = Conv2D(256, (3, 3), activation="relu", kernel_initializer="he_normal", padding="same", name="Conv5")(pool4)
    pool5 = MaxPooling2D((2, 1), name="pool5")(conv5)
    
    reshape = Reshape(target_shape = (32, 256), name="reshape")(pool5)
    dropout_layer = Dropout(0.2)(reshape)

    blstm1 = Bidirectional( tf.keras.layers.LSTM(256, return_sequences=True, dropout=0.25))(dropout_layer)
    blstm2 = Bidirectional( tf.keras.layers.LSTM(256, return_sequences=True, dropout=0.25))(blstm1)
    
    dense = Dense(80, activation="softmax", name="dense1")(blstm2)

    output = CTCLayer(name="ctc_loss")(labels, dense)

    model =  Model(inputs=[input_img, labels], outputs=output, name="htr_model_v1")
    
    opt = Adam()

    model.compile(optimizer=opt)
    return model


In [8]:
model = build_model()
model.summary()

Model: "htr_model_v1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
image (InputLayer)              [(None, 32, 128, 1)] 0                                            
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 32, 128, 16)  416         image[0][0]                      
__________________________________________________________________________________________________
pool1 (MaxPooling2D)            (None, 16, 64, 16)   0           Conv1[0][0]                      
__________________________________________________________________________________________________
Conv2 (Conv2D)                  (None, 16, 64, 32)   12832       pool1[0][0]                      
_______________________________________________________________________________________

In [9]:
history = model.fit(x = [X_train, Y_train], epochs = 10, batch_size = 10, verbose = 1)

Train on 177 samples
Epoch 1/10


ValueError: operands could not be broadcast together with shapes (10,1) (7,1) (10,1) 