Implement a simplified Optical Character Recognition (OCR) pipeline using a CNN and CTC (Connectionist Temporal Classification) loss for sequence prediction of handwritten words (e.g., from IAM or synthetic datasets).

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow import keras

In [2]:
#Simulate data
batch_size=16
img_width,img_height=100,32
num_classes=26+1+1 #26 letters + space + delete

X_data=np.random.rand(batch_size,img_width,img_height,1).astype(np.float32)
y_data=np.random.randint(1,27,size=(batch_size,10))

In [3]:
# Input lengths (how many time steps per image) after downsampling (e.g., 1/4)
input_lengths=np.full((batch_size,1),img_width//4,dtype=np.int32)
label_lengths=np.full((batch_size,1),10,dtype=np.int32)

In [4]:
#Define basic CNN + CTC model
input_img=keras.Input(shape=(img_width,img_height,1),name='input_img')
x=keras.layers.Conv2D(32,(3,3),activation='relu',padding='same')(input_img)
x=keras.layers.MaxPooling2D((2,2))(x)
x=keras.layers.Conv2D(64,(3,3),activation='relu',padding='same')(x)
x=keras.layers.MaxPooling2D((2,2))(x)

In [5]:
#Reshape for RNN
new_shape=(img_width//4,(img_height//4)*64)
x=keras.layers.Reshape(target_shape=new_shape)(x)

In [6]:
# Bidirectional LSTM for sequence modeling
x=keras.layers.Bidirectional(keras.layers.LSTM(128,return_sequences=True))(x)
x=keras.layers.Dense(num_classes,activation='softmax')(x) #Predict char at each layer

In [7]:
#Define CTC loss layer
labels = tf.keras.Input(shape=(None,), dtype='int32', name='labels')
input_len = tf.keras.Input(shape=(1,), dtype='int32', name='input_length')
label_len = tf.keras.Input(shape=(1,), dtype='int32', name='label_length')

In [17]:
#Define CTC loss layer
def ctc_lambda_func(args):
    y_pred, labels, input_len, label_len = args
    return tf.keras.backend.ctc_batch_cost(labels, y_pred, input_len, label_len)
#ctc_batch_cost: computes CTC loss for a batch.

loss_out = tf.keras.layers.Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_len, label_len])

In [18]:
model=keras.Model(inputs=[input_img,labels,input_len,label_len],outputs=[loss_out])
model.compile(optimizer='adam',loss={'ctc':lambda y_true,y_pred: y_pred})

In [19]:
model.fit(
    x={'input_img': X_data, 'labels': y_data, 'input_length': input_lengths, 'label_length': label_lengths},
    y=np.zeros((batch_size, 1)),
    epochs=1
)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - loss: 60.2050


<keras.src.callbacks.history.History at 0x2c6fb4d2730>

In [20]:
model.summary()