#### Importing Lib.

In [2]:
import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow.keras.backend as K


#### Loading the CSV files and preparing a vocabulary

In [6]:
# Paths
dataset_path = 'lm2LaTeX-100K'
images_path = os.path.join(dataset_path, 'processed_img')

# Load CSV files
train_df = pd.read_csv(os.path.join(dataset_path, 'im2latex_train.csv'))
valid_df = pd.read_csv(os.path.join(dataset_path, 'im2latex_validate.csv'))
test_df  = pd.read_csv(os.path.join(dataset_path, 'im2latex_test.csv'))

# Cleaning the test data as it has some NaN values
test_df = test_df.dropna(subset=['formula'])

# Build character vocabulary
all_texts = pd.concat([train_df['formula'], valid_df['formula'], test_df['formula']])
unique_chars = set(''.join(all_texts))

char_to_num = {char: idx for idx, char in enumerate(sorted(unique_chars))}
num_to_char = {idx: char for char, idx in char_to_num.items()}
vocab_size = len(char_to_num)

print(f"Vocabulary Size: {vocab_size}")
print(f"Example mapping: {list(char_to_num.items())[:10]}")


Vocabulary Size: 93
Example mapping: [(' ', 0), ('!', 1), ('"', 2), ('#', 3), ('&', 4), ("'", 5), ('(', 6), (')', 7), ('*', 8), ('+', 9)]


#### Defineing a custome data generator function with CTC inputs

In [7]:
class CTCDataGenerator(Sequence):
    def __init__(self, dataframe, images_dir, batch_size, img_height, img_width, datagen, max_label_length=100, shuffle=True):
        self.dataframe = dataframe
        self.images_dir = images_dir
        self.batch_size = batch_size
        self.img_height = img_height
        self.img_width = img_width
        self.datagen = datagen
        self.shuffle = shuffle
        self.indices = np.arange(len(self.dataframe))
        self.max_label_length = max_label_length
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.dataframe) / self.batch_size))

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)

    def encode_label(self, text):
        return [char_to_num[char] for char in text]

    def __getitem__(self, index):
        batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        batch_filenames = self.dataframe.iloc[batch_indices]['filename'].values
        batch_labels_text = self.dataframe.iloc[batch_indices]['latex'].values

        batch_images = []
        batch_labels = []
        label_lengths = []

        for filename, label_text in zip(batch_filenames, batch_labels_text):
            img_path = os.path.join(self.images_dir, filename)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (img_width, img_height))
            img = np.expand_dims(img, axis=-1)
            batch_images.append(img)

            encoded_label = self.encode_label(label_text)
            batch_labels.append(encoded_label)
            label_lengths.append(len(encoded_label))

        batch_images = np.array(batch_images)
        batch_images = self.datagen.flow(batch_images, batch_size=self.batch_size, shuffle=False).next()

        batch_labels_padded = tf.keras.preprocessing.sequence.pad_sequences(
            batch_labels, 
            maxlen=self.max_label_length, 
            padding='post', 
            value=len(char_to_num)  # optional
        )

        feature_width = self.img_width // 4  # adjust according to CNN pooling
        input_lengths = np.ones((self.batch_size, 1)) * feature_width
        label_lengths = np.expand_dims(np.array(label_lengths), axis=-1)

        inputs = {
            'input_image': batch_images,
            'labels': batch_labels_padded,
            'input_length': input_lengths,
            'label_length': label_lengths
        }
        outputs = {'ctc': np.zeros([self.batch_size])}

        return inputs, outputs


#### Setting up data augmentation

In [10]:
# Augmentation settings
train_datagen = ImageDataGenerator(
    rescale=1./255,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1
)

valid_datagen = ImageDataGenerator(
    rescale=1./255
)

# Generator instances
batch_size = 32
img_height = 192
img_width = 192

train_generator = CTCDataGenerator(train_df, images_path, batch_size, img_height, img_width, train_datagen)
valid_generator = CTCDataGenerator(valid_df, images_path, batch_size, img_height, img_width, valid_datagen)


#### Building the CRNN model with CTC Loss

In [11]:
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

# Inputs
input_image = layers.Input(shape=(img_height, img_width, 1), name='input_image')
labels = layers.Input(name='labels', shape=[None], dtype='float32')
input_length = layers.Input(name='input_length', shape=[1], dtype='int64')
label_length = layers.Input(name='label_length', shape=[1], dtype='int64')

# CNN
x = layers.Conv2D(32, (3,3), activation='relu', padding='same')(input_image)
x = layers.MaxPooling2D(pool_size=(2,2))(x)
x = layers.Conv2D(64, (3,3), activation='relu', padding='same')(x)
x = layers.MaxPooling2D(pool_size=(2,2))(x)

# Reshape
new_shape = (img_width // 4, (img_height // 4) * 64)
x = layers.Reshape(target_shape=new_shape)(x)

# RNN
x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)

# Output layer
x = layers.Dense(vocab_size + 1, activation='softmax')(x)

# CTC loss
ctc_loss = layers.Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length])

# Model
model = models.Model(inputs=[input_image, labels, input_length, label_length], outputs=ctc_loss)

model.compile(optimizer='adam')
model.summary()


2025-04-26 18:54:13.543778: W tensorflow/core/framework/op_kernel.cc:1829] UNKNOWN: JIT compilation failed.
2025-04-26 18:54:13.543819: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: UNKNOWN: JIT compilation failed.


UnknownError: {{function_node __wrapped__Sign_device_/job:localhost/replica:0/task:0/device:GPU:0}} JIT compilation failed. [Op:Sign] name: 