In [6]:
# !wget -q https://github.com/sayakpaul/Handwriting-Recognizer-in-Keras/releases/download/v1.0.0/IAM_Words.zip
# !unzip -qq IAM_Words.zip
# !
# !mkdir data
# !mkdir data/words
# !tar -xf IAM_Words/words.tgz -C data/words
# !mv IAM_Words/words.txt data


In [7]:
# !head -20 data/words.txt

In [8]:
# !export MLFLOW_TRACKING_URI=http://localhost:8080

In [1]:
import keras
from keras.layers import StringLookup
from keras import ops
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import os
import mlflow
from keras.saving import register_keras_serializable
import logging
logging.basicConfig(filename='log.log', filemode='w', encoding='utf-8', level=logging.INFO,format='%(asctime)s - %(levelname)s - %(message)s')

np.random.seed(42)
keras.utils.set_random_seed(42)
logging.info("Initialized logging and set up random seeds")

2025-04-12 13:33:27.860345: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-12 13:33:27.881333: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744445007.904849   57369 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744445007.911615   57369 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1744445007.934290   57369 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:
try:
    mlflow.set_tracking_uri("http://127.0.0.1:8080")
    mlflow.set_experiment("handwriting-recognition")
    logging.info("MLFlow server set up at http://127.0.0.1:8080 and experiment initialized")
except Exception as e:
    logging.exception(f"Failed to set up MLFlow server : {e}")

In [3]:
base_path = "data"
words_list = []

try:
    words = open(f"{base_path}/words.txt", "r").readlines()
    for line in words:
        if line[0] == "#":
            continue
        if line.split(" ")[1] != "err":  # We don't need to deal with errored entries.
            words_list.append(line)
    logging.info("Successfully read label file")
except Exception as e:
    logging.exception(f"Unable to open label file : {e}")

np.random.shuffle(words_list)

In [4]:
split_idx = int(0.9 * len(words_list))
train_samples = words_list[:split_idx]
test_samples = words_list[split_idx:]

val_split_idx = int(0.5 * len(test_samples))
validation_samples = test_samples[:val_split_idx]
test_samples = test_samples[val_split_idx:]

assert len(words_list) == len(train_samples) + len(validation_samples) + len(
    test_samples
)
logging.info("Successfully split train-valid-test=0.9:0.05:0.05 labels")
logging.info(f"Original dataset has {len(train_samples)+len(validation_samples)+len(test_samples)} datapoints")


In [5]:
base_image_path = os.path.join(base_path, "words")


def get_image_paths_and_labels(samples):
    paths = []
    corrected_samples = []
    for i, file_line in enumerate(samples):
        line_split = file_line.strip()
        line_split = line_split.split(" ")

        # Each line split will have this format for the corresponding image:
        # part1/part1-part2/part1-part2-part3.png
        image_name = line_split[0]
        partI = image_name.split("-")[0]
        partII = image_name.split("-")[1]
        img_path = os.path.join(
            base_image_path, partI, partI + "-" + partII, image_name + ".png"
        )
        try:
            if os.path.getsize(img_path):
                paths.append(img_path)
                corrected_samples.append(file_line.split("\n")[0])
        except Exception as e:
            logging.exception(f"{img_path} is corrupt or does not exist : {e}")

    return paths, corrected_samples


train_img_paths, train_labels = get_image_paths_and_labels(train_samples)
validation_img_paths, validation_labels = get_image_paths_and_labels(validation_samples)
test_img_paths, test_labels = get_image_paths_and_labels(test_samples)
logging.info(f"Corrected dataset has {len(train_labels)+len(validation_labels)+len(test_labels)} datapoints")


In [6]:
# Find maximum length and the size of the vocabulary in the training data.
train_labels_cleaned = []
characters = set()
max_len = 0

for label in train_labels:
    label = label.split(" ")[-1].strip()
    for char in label:
        characters.add(char)

    max_len = max(max_len, len(label))
    train_labels_cleaned.append(label)

characters = sorted(list(characters))

logging.info(f"Maximum length: {max_len}")
logging.info(f"Vocab size: {len(characters)}")


In [7]:
def clean_labels(labels):
    cleaned_labels = []
    for label in labels:
        label = label.split(" ")[-1].strip()
        cleaned_labels.append(label)
    return cleaned_labels


validation_labels_cleaned = clean_labels(validation_labels)
test_labels_cleaned = clean_labels(test_labels)
logging.info("Successfully cleaned all labels")

In [8]:
AUTOTUNE = tf.data.AUTOTUNE

# Mapping characters to integers.
char_to_num = StringLookup(vocabulary=list(characters), mask_token=None)

# Mapping integers back to original characters.
num_to_char = StringLookup(
    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
)

logging.info("Successfully initialized lookup tables for character to number conversion")


I0000 00:00:1744445029.125647   57369 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1747 MB memory:  -> device: 0, name: NVIDIA GeForce MX230, pci bus id: 0000:01:00.0, compute capability: 6.1


In [9]:
def distortion_free_resize(image, img_size):
    w, h = img_size
    image = tf.image.resize(image, size=(h, w), preserve_aspect_ratio=True)

    # Check tha amount of padding needed to be done.
    pad_height = h - ops.shape(image)[0]
    pad_width = w - ops.shape(image)[1]

    # Only necessary if you want to do same amount of padding on both sides.
    if pad_height % 2 != 0:
        height = pad_height // 2
        pad_height_top = height + 1
        pad_height_bottom = height
    else:
        pad_height_top = pad_height_bottom = pad_height // 2

    if pad_width % 2 != 0:
        width = pad_width // 2
        pad_width_left = width + 1
        pad_width_right = width
    else:
        pad_width_left = pad_width_right = pad_width // 2

    image = tf.pad(
        image,
        paddings=[
            [pad_height_top, pad_height_bottom],
            [pad_width_left, pad_width_right],
            [0, 0],
        ],
    )

    image = ops.transpose(image, (1, 0, 2))
    image = tf.image.flip_left_right(image)
    return image


In [10]:
batch_size = 64
padding_token = 99
image_width = 128
image_height = 32



def preprocess_image(image_path, img_size=(image_width, image_height)):
    try:
        image = tf.io.read_file(image_path)
        image = tf.image.decode_png(image, 1)
        image = distortion_free_resize(image, img_size)
        image = ops.cast(image, tf.float32) / 255.0
    except Exception as e:
        logging.exception(f"Error processing {image_path} : {e}")
    return image


def vectorize_label(label):
    try:
        label = char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8"))
        length = ops.shape(label)[0]
        pad_amount = max_len - length
        label = tf.pad(label, paddings=[[0, pad_amount]], constant_values=padding_token)
    except Exception as e:
        logging.exception(f"Error processing {label} : {e}")
    return label


def process_images_labels(image_path, label):
    image = preprocess_image(image_path)
    label = vectorize_label(label)
    return {"image": image, "label": label}


def prepare_dataset(image_paths, labels):
    dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels)).map(
        process_images_labels, num_parallel_calls=AUTOTUNE
    )
    return dataset.batch(batch_size).cache().prefetch(AUTOTUNE)


In [11]:
try:
    train_ds = prepare_dataset(train_img_paths[:100], train_labels_cleaned[:100])
    validation_ds = prepare_dataset(validation_img_paths[:100], validation_labels_cleaned[:100])
    test_ds = prepare_dataset(test_img_paths[:100], test_labels_cleaned[:100])
    logging.info("Successfully created train, valid, test datasets")
except Exception as e:
    logging.exception(f"Error preparing datasets : {e}")

In [12]:
@register_keras_serializable()
class CTCLayer(keras.layers.Layer):
    def __init__(self, name=None,**kwargs):
        super().__init__(name=name,**kwargs)
        self.loss_fn = tf.keras.backend.ctc_batch_cost

    def call(self, y_true, y_pred):
        batch_len = ops.cast(ops.shape(y_true)[0], dtype="int64")
        input_length = ops.cast(ops.shape(y_pred)[1], dtype="int64")
        label_length = ops.cast(ops.shape(y_true)[1], dtype="int64")

        input_length = input_length * ops.ones(shape=(batch_len, 1), dtype="int64")
        label_length = label_length * ops.ones(shape=(batch_len, 1), dtype="int64")
        loss = self.loss_fn(y_true, y_pred, input_length, label_length)
        self.add_loss(loss)

        # At test time, just return the computed predictions.
        return y_pred


def build_model():
    # Inputs to the model
    input_img = keras.Input(shape=(image_width, image_height, 1), name="image")
    labels = keras.layers.Input(name="label", shape=(None,))

    # First conv block.
    x = keras.layers.Conv2D(
        32,
        (3, 3),
        activation="relu",
        kernel_initializer="he_normal",
        padding="same",
        name="Conv1",
    )(input_img)
    x = keras.layers.MaxPooling2D((2, 2), name="pool1")(x)

    # Second conv block.
    x = keras.layers.Conv2D(
        64,
        (3, 3),
        activation="relu",
        kernel_initializer="he_normal",
        padding="same",
        name="Conv2",
    )(x)
    x = keras.layers.MaxPooling2D((2, 2), name="pool2")(x)

    # We have used two max pool with pool size and strides 2.
    # Hence, downsampled feature maps are 4x smaller. The number of
    # filters in the last layer is 64. Reshape accordingly before
    # passing the output to the RNN part of the model.
    new_shape = ((image_width // 4), (image_height // 4) * 64)
    x = keras.layers.Reshape(target_shape=new_shape, name="reshape")(x)
    x = keras.layers.Dense(64, activation="relu", name="dense1")(x)
    x = keras.layers.Dropout(0.2)(x)

    # RNNs.
    x = keras.layers.Bidirectional(
        keras.layers.LSTM(128, return_sequences=True, dropout=0.25)
    )(x)
    x = keras.layers.Bidirectional(
        keras.layers.LSTM(64, return_sequences=True, dropout=0.25)
    )(x)

    # +2 is to account for the two special tokens introduced by the CTC loss.
    # The recommendation comes here: https://git.io/J0eXP.
    x = keras.layers.Dense(
        len(char_to_num.get_vocabulary()) + 2, activation="softmax", name="dense2"
    )(x)

    # Add CTC layer for calculating CTC loss at each step.
    output = CTCLayer(name="ctc_loss")(labels, x)

    # Define the model.
    model = keras.models.Model(
        inputs=[input_img, labels], outputs=output, name="handwriting_recognizer"
    )
    # Optimizer.
    opt = keras.optimizers.Adam()
    # Compile the model and return.
    model.compile(optimizer=opt)
    return model


# Get the model.
try:
    model = build_model()
    logging.info(f"Model successfully built")
except Exception as e:
    logging.exception(f"Model build error : {e}")


In [13]:
validation_images = []
validation_labels = []

for batch in validation_ds:
    validation_images.append(batch["image"])
    validation_labels.append(batch["label"])


2025-04-12 13:33:58.845739: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [14]:
def calculate_edit_distance(labels, predictions):
    # Get a single batch and convert its labels to sparse tensors.
    sparse_labels = ops.cast(tf.sparse.from_dense(labels), dtype=tf.int64)

    # Make predictions and convert them to sparse tensors.
    input_len = np.ones(predictions.shape[0]) * predictions.shape[1]
    predictions_decoded = keras.ops.nn.ctc_decode(
        predictions, sequence_lengths=input_len
    )[0][0][:, :max_len]
    sparse_predictions = ops.cast(
        tf.sparse.from_dense(predictions_decoded), dtype=tf.int64
    )

    # Compute individual edit distances and average them out.
    edit_distances = tf.edit_distance(
        sparse_predictions, sparse_labels, normalize=False
    )
    return tf.reduce_mean(edit_distances)


class MLFlowMetricCallback(keras.callbacks.Callback):
    def __init__(self, pred_model):
        super().__init__()
        self.prediction_model = pred_model

    def on_epoch_end(self, epoch, logs=None):
        edit_distances = []

        for i in range(len(validation_images)):
            labels = validation_labels[i]
            predictions = self.prediction_model.predict(validation_images[i])
            edit_distances.append(calculate_edit_distance(labels, predictions).numpy())
        logs["avg_edit_distance"] = np.mean(edit_distances)
        try:
            for k,v in logs.items():
                mlflow.log_metric(f"{k}", v, step=epoch)
            logging.info("Successfully logged metrics in the server")
        except Exception as e:
            logging.exception(f"Unable to log metrics in the server : {e}")


In [15]:
def decode_batch_predictions(pred):
    input_len = np.ones(pred.shape[0]) * pred.shape[1]
    # Use greedy search. For complex tasks, you can use beam search.
    results = keras.ops.nn.ctc_decode(pred, sequence_lengths=input_len)[0][0][
        :, :max_len
    ]
    # Iterate over the results and get back the text.
    output_text = []
    for res in results:
        res = tf.gather(res, tf.where(tf.math.not_equal(res, -1)))
        res = (
            tf.strings.reduce_join(num_to_char(res))
            .numpy()
            .decode("utf-8")
            .replace("[UNK]", "")
        )
        output_text.append(res)
    return output_text

In [22]:
@register_keras_serializable()
class PredictionModelWithDecode(keras.Model):
    def __init__(self, original_model, **kwargs):
        super(PredictionModelWithDecode, self).__init__(**kwargs)
        self.original_model = original_model

    def call(self, inputs):
        # Get predictions from the original model
        pred = self.original_model(inputs)
        
        # Decode predictions (e.g., use CTC decode)
        decoded_predictions = decode_batch_predictions(pred)
        return decoded_predictions
    
    def get_config(self):
        config = super(PredictionModelWithDecode, self).get_config()
        config.update({"original_model": self.original_model.get_config()})
        return config

    # Define `from_config` method for loading
    @classmethod
    def from_config(cls, config):
        original_model_config = config.pop("original_model")
        original_model = keras.Model.from_config(original_model_config)
        return cls(original_model=original_model, **config)

In [None]:
epochs = 10  # To get good results this should be at least 50.
model = build_model()
prediction_model = keras.models.Model(
    model.get_layer(name="image").output, model.get_layer(name="dense2").output
)


custom_metric_callback = MLFlowMetricCallback(prediction_model)
prediction_model_with_decode = PredictionModelWithDecode(prediction_model)

# Train the model.
with mlflow.start_run(run_name='bisleri') as run:
    mlflow.log_param("batch_size",batch_size)
    mlflow.log_param("padding_token",padding_token)
    mlflow.log_param("image_width",image_width)
    mlflow.log_param("image_height",image_height)
    mlflow.log_param("epochs",epochs)
    history = model.fit(
        train_ds,
        validation_data=validation_ds,
        epochs=epochs,
        callbacks=[custom_metric_callback],
    )
    mlflow.tensorflow.log_model(prediction_model_with_decode, "models")

logging.info("Succesfully finished training and logging")


Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 7262.86
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 463ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2s/step - loss: 7031.0547 - val_loss: 4839.1616 - avg_edit_distance: 21.0000
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - loss: 5655.746
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 456ms/step - loss: 5460.2974 - val_loss: 3107.1682 - avg_edit_distance: 21.0000
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 3863.5151
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 451ms/step - loss: 3721.2073 - val_loss: 1908.4198 - avg_edit_distance: 21.0000
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0



🏃 View run bisleri at: http://127.0.0.1:8080/#/experiments/754929904376585580/runs/f2baf64de38f416295f5aa5b850e829f
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/754929904376585580


In [7]:
import mlflow.tensorflow
mlflow.set_tracking_uri("http://127.0.0.1:8080")
model_path = "models:/tester/8"
model = mlflow.tensorflow.load_model(model_path)

In [10]:
model.metadata

AttributeError: 'PredictionModelWithDecode' object has no attribute 'metadata'

In [2]:


#  Let's check results on some test samples.
for batch in test_ds.take(1):
    batch_images = batch["image"]
    _, ax = plt.subplots(4, 4, figsize=(15, 8))

    pred_texts= prediction_model.predict(batch_images)
    # pred_texts = decode_batch_predictions(preds)
    print(pred_texts)

    for i in range(16):
        img = batch_images[i]
        img = tf.image.flip_left_right(img)
        img = ops.transpose(img, (1, 0, 2))
        img = (img * 255.0).numpy().clip(0, 255).astype(np.uint8)
        img = img[:, :, 0]

        title = f"Prediction: {pred_texts[i]}"
        ax[i // 4, i % 4].imshow(img, cmap="gray")
        ax[i // 4, i % 4].set_title(title)
        ax[i // 4, i % 4].axis("off")

plt.show()

NameError: name 'test_ds' is not defined

In [4]:
import requests
import numpy as np
import keras
from keras import ops
image_width = 128
image_height = 32
# Prepare input image (flattened)
image = np.random.rand(1, 128, 32, 1).astype("float32")  # Replace with real image preprocessing
input_list = image.tolist()  

# Construct request JSON payload using dataframe_split
payload = {
    "inputs": input_list # or use "dataframe_split"
}

# Send POST request to MLflow model server
response = requests.post(
    "http://127.0.0.1:8000/invocations",
    headers={"Content-Type": "application/json"},
    json=payload
)

out = response.json()


In [6]:
out

{'error_code': 'BAD_REQUEST',
 'message': 'Encountered an unexpected error while evaluating the model. Verify that the serialized input Dataframe is compatible with the model for inference.',
 'stack_trace': 'Traceback (most recent call last):\n  File "/home/jayagowtham/Documents/MLOps/A8/.a8_env/lib/python3.10/site-packages/mlflow/pyfunc/scoring_server/__init__.py", line 369, in invocations\n    raw_predictions = model.predict(data, params=params)\n  File "/home/jayagowtham/Documents/MLOps/A8/.a8_env/lib/python3.10/site-packages/mlflow/pyfunc/__init__.py", line 804, in predict\n    return self._predict(data, params)\n  File "/home/jayagowtham/Documents/MLOps/A8/.a8_env/lib/python3.10/site-packages/mlflow/pyfunc/__init__.py", line 854, in _predict\n    return self._predict_fn(data, params=params)\n  File "/home/jayagowtham/Documents/MLOps/A8/.a8_env/lib/python3.10/site-packages/mlflow/tensorflow/__init__.py", line 901, in predict\n    return self.keras_model.predict(data)\n  File "/hom

In [11]:
decode_batch_predictions(np.array(out))

I0000 00:00:1744444623.984704   56492 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 184 MB memory:  -> device: 0, name: NVIDIA GeForce MX230, pci bus id: 0000:01:00.0, compute capability: 6.1
I0000 00:00:1744444624.014464   56492 cuda_executor.cc:479] failed to allocate 184.12MiB (193069056 bytes) from device: RESOURCE_EXHAUSTED: : CUDA_ERROR_OUT_OF_MEMORY: out of memory


NameError: name 'max_len' is not defined