# Libraries

In [1]:
import os
import string
import cv2
import fnmatch
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import backend as K
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPool2D, BatchNormalization, Lambda, Bidirectional, LSTM, Dense

# Preprocessing

# Model Architecture

### Model = CNN + RNN + CTC loss

In [7]:
# input with shape of height=32 and width=128 
inputs = Input(shape=(32,128,1))
 
# convolution layer with kernel size (3,3)
conv_1 = Conv2D(64, (3,3), activation = 'relu', padding='same')(inputs)
# poolig layer with kernel size (2,2)
pool_1 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_1)
 
conv_2 = Conv2D(128, (3,3), activation = 'relu', padding='same')(pool_1)
pool_2 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_2)
 
conv_3 = Conv2D(256, (3,3), activation = 'relu', padding='same')(pool_2)
 
conv_4 = Conv2D(256, (3,3), activation = 'relu', padding='same')(conv_3)
# poolig layer with kernel size (2,1)
pool_4 = MaxPool2D(pool_size=(2, 1))(conv_4)
 
conv_5 = Conv2D(512, (3,3), activation = 'relu', padding='same')(pool_4)
# Batch normalization layer
batch_norm_5 = BatchNormalization()(conv_5)
 
conv_6 = Conv2D(512, (3,3), activation = 'relu', padding='same')(batch_norm_5)
batch_norm_6 = BatchNormalization()(conv_6)
pool_6 = MaxPool2D(pool_size=(2, 1))(batch_norm_6)
 
conv_7 = Conv2D(512, (2,2), activation = 'relu')(pool_6)
 
squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv_7)
 
# bidirectional LSTM layers with units=128
blstm_1 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(squeezed)
blstm_2 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(blstm_1)
 
outputs = Dense(len(char_list)+1, activation = 'softmax')(blstm_2)
 
act_model = Model(inputs, outputs)

In [9]:
labels = Input(name='the_labels', shape=[max_label_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
 
 
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
 
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
 
 
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([outputs, labels, input_length, label_length])
model = Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)

In [38]:
type(labels)

tensorflow.python.framework.ops.Tensor

In [11]:
from tensorflow.keras.callbacks import ModelCheckpoint
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer = 'adam')
 
filepath="best_model.hdf5"
checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
callbacks_list = [checkpoint]

In [40]:
training_img = np.array(training_img)
train_input_length = np.array(train_input_length)
train_label_length = np.array(train_label_length)

valid_img = np.array(valid_img)
valid_input_length = np.array(valid_input_length)
valid_label_length = np.array(valid_label_length)
 
model.fit(x=[training_img, train_padded_txt, train_input_length, train_label_length], y=np.zeros(13500), batch_size=256, epochs = 10, validation_data = ([valid_img, valid_padded_txt, valid_input_length, valid_label_length], [np.zeros(1500)]), verbose = 1, callbacks = callbacks_list)


Train on 13500 samples, validate on 1500 samples
Epoch 1/10
Epoch 00001: val_loss improved from inf to 28.39119, saving model to best_model.hdf5
Epoch 2/10
Epoch 00002: val_loss did not improve from 28.39119
Epoch 3/10
Epoch 00003: val_loss improved from 28.39119 to 27.69465, saving model to best_model.hdf5
Epoch 4/10
Epoch 00004: val_loss improved from 27.69465 to 26.97057, saving model to best_model.hdf5
Epoch 5/10
Epoch 00005: val_loss did not improve from 26.97057
Epoch 6/10
Epoch 00006: val_loss did not improve from 26.97057
Epoch 7/10
Epoch 00007: val_loss did not improve from 26.97057
Epoch 8/10
Epoch 00008: val_loss did not improve from 26.97057
Epoch 9/10
Epoch 00009: val_loss improved from 26.97057 to 25.70704, saving model to best_model.hdf5
Epoch 10/10
Epoch 00010: val_loss improved from 25.70704 to 25.36078, saving model to best_model.hdf5


<tensorflow.python.keras.callbacks.History at 0x7f6d30389dd0>

In [12]:
# load the saved best model weights
act_model.load_weights('best_model.hdf5')

# predict outputs on validation images
prediction = act_model.predict(valid_img)
 
# use CTC decoder
out = K.get_value(K.ctc_decode(prediction, input_length=np.ones(prediction.shape[0])*prediction.shape[1],
                         greedy=True)[0][0])
 
# see the results
i = 0
for x in out:
    print(valid_orig_txt[i])
    for p in x:  
        if int(p) != -1:
            print(char_list[int(p)], end = '')       
    print('\n')
    i+=1

ValueError: Error when checking input: expected input_1 to have 4 dimensions, but got array with shape (32, 128, 1)

In [None]:
valid_img