In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from tensorflow.keras.layers.experimental.preprocessing import StringLookup
from tensorflow import keras
import cv2
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Conv2D, Bidirectional, LSTM, GRU, Dense
from tensorflow.keras.layers import Dropout, BatchNormalization, LeakyReLU, PReLU, Layer
from tensorflow.keras.layers import Input, Add, Activation, Lambda, MaxPooling2D, Reshape
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.constraints import MaxNorm
from tensorflow import keras
np.random.seed(42)

In [None]:
base_path = "../input/iam-words/IAM_Words"
words_list = []

words = open(f"{base_path}/words.txt", "r").readlines()
for line in words:
    if line[0] == "#":
        continue
    if line.split(" ")[1] != "err":  # We don't need to deal with errored entries.
        words_list.append(line)

print(len(words_list))

np.random.shuffle(words_list)

In [None]:
split_idx = int(0.9 * len(words_list))
train_samples = words_list[:split_idx]
test_samples = words_list[split_idx:]

val_split_idx = int(0.5 * len(test_samples))
validation_samples = test_samples[:val_split_idx]
test_samples = test_samples[val_split_idx:]

assert len(words_list) == len(train_samples) + len(validation_samples) + len(
    test_samples
)

print(f"Total training samples: {len(train_samples)}")
print(f"Total validation samples: {len(validation_samples)}")
print(f"Total test samples: {len(test_samples)}")

In [None]:
base_image_path = os.path.join(base_path, "words")


def get_image_paths_and_labels(samples):
    paths = []
    corrected_samples = []
    for (i, file_line) in enumerate(samples):
        line_split = file_line.strip()
        line_split = line_split.split(" ")

        # Each line split will have this format for the corresponding image:
        # part1/part1-part2/part1-part2-part3.png
        image_name = line_split[0]
        partI = image_name.split("-")[0]
        partII = image_name.split("-")[1]
        img_path = os.path.join(
            base_image_path, partI, partI + "-" + partII, image_name + ".png"
        )
        if os.path.getsize(img_path):
            paths.append(img_path)
            corrected_samples.append(file_line.split("\n")[0])

    return paths, corrected_samples


train_img_paths, train_labels = get_image_paths_and_labels(train_samples)
validation_img_paths, validation_labels = get_image_paths_and_labels(validation_samples)
test_img_paths, test_labels = get_image_paths_and_labels(test_samples)

In [None]:
train_labels[0]

In [None]:
# Find maximum length and the size of the vocabulary in the training data.
train_labels_cleaned = []
#characters = set()
max_len = 0

for label in train_labels:
    label = label.split(" ")[-1].strip()
    max_len = max(max_len, len(label))
    train_labels_cleaned.append(label)

# print("Maximum length: ", max_len)
# print("Vocab size: ", len(characters))

# Check some label samples.
# with open('./token.txt','w+') as token:
#     for char in characters:
#         token.writelines(char + '\n')


In [None]:
characters = []
with open('../input/char-token/char_token.txt','r') as token:
    for char in token.readlines()[0]:
        characters.append(char)
len(characters)

In [None]:
def clean_labels(labels):
    cleaned_labels = []
    for label in labels:
        label = label.split(" ")[-1].strip()
        cleaned_labels.append(label)
    return cleaned_labels


validation_labels_cleaned = clean_labels(validation_labels)
test_labels_cleaned = clean_labels(test_labels)

In [None]:
AUTOTUNE = tf.data.AUTOTUNE
# Mapping characters to integers.
char_to_num = StringLookup(vocabulary=list(characters), mask_token=None)
print(char_to_num.get_vocabulary())
# Mapping integers back to original characters.
num_to_char = StringLookup(
    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
)




In [None]:
def distortion_free_resize(image, img_size):
    w, h = img_size
    image = tf.image.resize(image, size=(h, w), preserve_aspect_ratio=True)

    # Check tha amount of padding needed to be done.
    pad_height = h - tf.shape(image)[0]
    pad_width = w - tf.shape(image)[1]

    # Only necessary if you want to do same amount of padding on both sides.
    if pad_height % 2 != 0:
        height = pad_height // 2
        pad_height_top = height + 1
        pad_height_bottom = height
    else:
        pad_height_top = pad_height_bottom = pad_height // 2

    if pad_width % 2 != 0:
        width = pad_width // 2
        pad_width_left = width + 1
        pad_width_right = width
    else:
        pad_width_left = pad_width_right = pad_width // 2

    image = tf.pad(
        image,
        paddings=[
            [pad_height_top, pad_height_bottom],
            [pad_width_left, pad_width_right],
            [0, 0],
        ],
    )

    image = tf.transpose(image, perm=[1, 0, 2])
    image = tf.image.flip_left_right(image)
    return image

In [None]:
batch_size = 64
padding_token = 99
image_width = 256
image_height = 64
def preprocess_image(image_path, img_size=(image_width, image_height)):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_png(image, 1)
    image = distortion_free_resize(image, img_size)
    image = tf.cast(image, tf.float32) / 255.0
    return image

def vectorize_label(label):
    label = char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8"))
    length = tf.shape(label)[0]
    pad_amount = max_len - length
    label = tf.pad(label, paddings=[[0, pad_amount]], constant_values=padding_token)
    return label

def process_images_labels(image_path, label):
    image = preprocess_image(image_path)
    label = vectorize_label(label)
    return {"input": image, "label": label}


def prepare_dataset(image_paths, labels):
    dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels)).map(
        process_images_labels, num_parallel_calls=AUTOTUNE
    )
    return dataset.batch(batch_size).cache().prefetch(AUTOTUNE)

In [None]:
train_ds = prepare_dataset(train_img_paths, train_labels_cleaned)
validation_ds = prepare_dataset(validation_img_paths, validation_labels_cleaned)
test_ds = prepare_dataset(test_img_paths, test_labels_cleaned)

In [None]:
class CTCLayer(Layer):
    def __init__(self, name=None):
        super().__init__(name=name)
        self.loss_fn = keras.backend.ctc_batch_cost

    def call(self, y_true, y_pred):
        batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
        input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
        label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")

        input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
        label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")
        loss = self.loss_fn(y_true, y_pred, input_length, label_length)
        self.add_loss(loss)

        # At test time, just return the computed predictions.
        return y_pred


def build_model():
    # Inputs to the model
    input_data = Input(name="input", shape=(image_width, image_height, 1))
    labels = Input(name="label", shape=(None,))

    # First conv block.
    cnn = Conv2D(filters=16, kernel_size=(3, 3), strides=(2, 2), padding="same", kernel_initializer="he_uniform")(input_data)
    cnn = PReLU(shared_axes=[1, 2])(cnn)
    cnn = BatchNormalization(renorm=True)(cnn)
    cnn = Conv2D(filters=16, kernel_size=(3, 3), padding="same")(cnn)

    # Second conv block.
    cnn = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding="same", kernel_initializer="he_uniform")(cnn)
    cnn = PReLU(shared_axes=[1, 2])(cnn)
    cnn = BatchNormalization(renorm=True)(cnn)
    cnn = Conv2D(filters=32, kernel_size=(3, 3), padding="same")(cnn)

    cnn = Conv2D(filters=40, kernel_size=(2, 4), strides=(2, 4), padding="same", kernel_initializer="he_uniform")(cnn)
    cnn = PReLU(shared_axes=[1, 2])(cnn)
    cnn = BatchNormalization(renorm=True)(cnn)
    cnn = Conv2D(filters=40, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn)
    cnn = Dropout(rate=0.2)(cnn)

    cnn = Conv2D(filters=48, kernel_size=(3, 3), strides=(1, 1), padding="same", kernel_initializer="he_uniform")(cnn)
    cnn = PReLU(shared_axes=[1, 2])(cnn)
    cnn = BatchNormalization(renorm=True)(cnn)
    cnn = Conv2D(filters=48, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn)
    cnn = Dropout(rate=0.2)(cnn)

    cnn = Conv2D(filters=56, kernel_size=(2, 4), strides=(2, 4), padding="same", kernel_initializer="he_uniform")(cnn)
    cnn = PReLU(shared_axes=[1, 2])(cnn)
    cnn = BatchNormalization(renorm=True)(cnn)
    cnn = Conv2D(filters=56, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn)
    cnn = Dropout(rate=0.2)(cnn)

    cnn = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding="same", kernel_initializer="he_uniform")(cnn)
    cnn = PReLU(shared_axes=[1, 2])(cnn)
    cnn = BatchNormalization(renorm=True)(cnn)
    
    cnn = MaxPooling2D(pool_size=(1, 2), strides=(1, 2), padding="valid")(cnn)

    print(cnn.get_shape())

    shape = cnn.get_shape()

    bgru = Reshape(( shape[1] , shape[2] * shape[3]))(cnn)

    bgru = Bidirectional(GRU(units=128, return_sequences=True, dropout=0.5))(bgru)
    bgru = Dense(units=256)(bgru)

    bgru = Bidirectional(GRU(units=128, return_sequences=True, dropout=0.5))(bgru)
    

    output_data = Dense(
        len(char_to_num.get_vocabulary()) + 2, activation="softmax", name="dense2"
    )(bgru)

    # Add CTC layer for calculating CTC loss at each step.
    output = CTCLayer(name="ctc_loss")(labels, output_data)

    # Define the model.
    model = keras.models.Model(
        inputs=[input_data, labels], outputs=output, name="handwriting_recognizer"
    )
    # Optimizer.
    opt = keras.optimizers.Adam()
    # Compile the model and return.
    model.compile(optimizer=opt)
    return model


# Get the model.
model = build_model()
model.summary()

In [None]:
validation_images = []
validation_labels = []

for batch in validation_ds:
    validation_images.append(batch["input"])
    validation_labels.append(batch["label"])

In [None]:
epochs = 100

model = build_model()
prediction_model = keras.models.Model(
    model.get_layer(name="input").input, model.get_layer(name="dense2").output
)



In [None]:
# Train the model.
history = model.fit(
    train_ds,
    validation_data=validation_ds,
    epochs=epochs,
)

In [None]:
model_json = model.to_json()
with open('./weightsBethham_model.json', 'w') as json_file:
    json_file.write(model_json)

from keras.models import save_model
network1_saved = save_model(model, './weightsBethham_model.hdf5')


model_json = prediction_model.to_json()
with open('./weightsBethham_prediction_model.json', 'w') as json_file:
    json_file.write(model_json)

from keras.models import save_model
network2_saved = save_model(model, './weightsBethham_prediction_model.hdf5')

In [None]:
with open('../input/aimweights2/weightsBethham_prediction_model.json','r') as json_file:
    json_saved_model = json_file.read()
json_saved_model
prediction_model_saved = tf.keras.models.model_from_json(json_saved_model)
prediction_model_saved.load_weights('../input/aimweights2/weightsBethham_prediction_model.hdf5')


In [None]:
# A utility function to decode the output of the network.
def decode_batch_predictions(pred):
    input_len = np.ones(pred.shape[0]) * pred.shape[1]
    # Use greedy search. For complex tasks, you can use beam search.
    results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
        :, :max_len
    ]
    # Iterate over the results and get back the text.
    output_text = []
    for res in results:
        res = tf.gather(res, tf.where(tf.math.not_equal(res, -1)))
        res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
        output_text.append(res)
    return output_text




In [None]:
history.history.keys()

In [None]:
def decode_batch_origin(batch):
    out_put = []
    for res in batch['label']:
        res = tf.gather(res, tf.where(tf.math.not_equal(res, 99)))
        res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
        out_put.append(res)
    return out_put


In [None]:
# A utility function to decode the output of the network.
def decode_batch_predictions(pred):
    input_len = np.ones(pred.shape[0]) * pred.shape[1]
    # Use greedy search. For complex tasks, you can use beam search.
    results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
        :, :max_len
    ]
    # Iterate over the results and get back the text.
    output_text = []
    for res in results:
        res = tf.gather(res, tf.where(tf.math.not_equal(res, -1)))
        res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
        output_text.append(res)
    return output_text


#  Let's check results on some test samples.

for batch in test_ds.take(10):
    batch_images = batch["input"]
    _, ax = plt.subplots(4, 4, figsize=(15, 8))

    preds = prediction_model_saved.predict(batch_images)
    pred_texts = decode_batch_predictions(preds)
    original_texts = decode_batch_origin(batch)

    for i in range(16):
        img = batch_images[i]
        img = tf.image.flip_left_right(img)
        img = tf.transpose(img, perm=[1, 0, 2])
        img = (img * 255.0).numpy().clip(0, 255).astype(np.uint8)
        img = img[:, :, 0]

        title = f"Prediction: {pred_texts[i]} \n Origin: {original_texts[i]}"
        ax[i // 4, i % 4].imshow(img, cmap="gray")
        ax[i // 4, i % 4].set_title(title)
        ax[i // 4, i % 4].axis("off")
plt.show()

In [None]:
#  Let's check results on some test samples.
word_count = 0
word_total = 0
character_count = 0
character_total = 0
for batch in test_ds:
    batch_images = batch["input"]
    preds = prediction_model_saved.predict(batch_images)
    pred_texts = decode_batch_predictions(preds)
    original_texts = decode_batch_origin(batch)
    
    for i in range (16):
        if pred_texts[i] == original_texts[i]:
            word_count += 1
        word_total += 1
        for k in range(0,len(original_texts[i])):
            try:
                if original_texts[i][k] == pred_texts[i][k]:
                    character_count += 1
                character_total += 1
            except:
                continue
#         print('Original:' , test_labels_cleaned[i])
#         print('Predicted:' , pred_texts[i])
print('Character_count: ', character_count )
print('Character_total: ', character_total )
print('Character Accuracy: ', character_count/character_total*100 )

print('Word_count: ', word_count )
print('Word_total: ', word_total )
print('Word Accuracy: ', word_count/word_total*100 )



In [None]:
# image = preprocess_image('../input/test-data/image.png')
# image = tf.expand_dims(image, axis=0)





# _, ax = plt.subplots(4, 4, figsize=(15, 8))

# preds = prediction_model_saved.predict(image)
# pred_texts = decode_batch_predictions(preds)

# for i in range(1):
#     img = image[i]
#     img = tf.image.flip_left_right(img)
#     img = tf.transpose(img, perm=[1, 0, 2])
#     img = (img * 255.0).numpy().clip(0, 255).astype(np.uint8)
#     img = img[:, :, 0]

#     title = f"Prediction: {pred_texts[i]} \n Origin: {original_texts[i]}"
#     ax[i // 4, i % 4].imshow(img, cmap="gray")
#     ax[i // 4, i % 4].set_title(title)
#     ax[i // 4, i % 4].axis("off")
# plt.show()