In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path
from collections import Counter

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from difflib import SequenceMatcher


# Standard vocab to ensure correct encoding
vocab = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "+", " "]

# Batch size for training and validation
batch_size = 1

# Desired image dimensions
img_width = 499
img_height = 60

# Factor by which the image is going to be downsampled
# by the convolutional blocks. We will be using two
# convolution blocks and each block will have
# a pooling layer which downsample the features by a factor of 2.
# Hence total downsampling factor would be 4.
downsample_factor = 4

# Preprocessing ----------------------------------------------------------
# Mapping characters to integers
char_to_num = layers.StringLookup(
    vocabulary=vocab, mask_token=None
)

# Mapping integers back to original characters
num_to_char = layers.StringLookup(
    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
)


def encode_single_sample(img_path, label):
    # 1. Read image
    img = tf.io.read_file(img_path)
    # 2. Decode and convert to grayscale
    img = tf.io.decode_png(img, channels=1)
    # 3. Convert to float32 in [0, 1] range
    img = tf.image.convert_image_dtype(img, tf.float32)
    # 4. Resize to the desired size
    img = tf.image.resize(img, [img_height, img_width])
    # 5. Transpose the image because we want the time
    # dimension to correspond to the width of the image.
    img = tf.transpose(img, perm=[1, 0, 2])
    # 6. Map the characters in label to numbers
    label = char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8"))
    # 7. Return a dict as our model is expecting two inputs
    return {"image": img, "label": label}

model = keras.models.load_model('trained_model')
# as max length is based on labels (which in this case are not relevant) we set it to a high nr so predictions can be made for lines of all lengths
max_length = 50
# Inference ----------------------------------
# Get the prediction model by extracting layers till the output layer
prediction_model = keras.models.Model(
    model.get_layer(name="image").input, model.get_layer(name="dense2").output
)
#prediction_model.summary()

# A utility function to decode the output of the network
def decode_batch_predictions(pred):
    input_len = np.ones(pred.shape[0]) * pred.shape[1]
    # Use greedy search. For complex tasks, you can use beam search
    results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
        :, :max_length
    ]
    # Iterate over the results and get back the text
    output_text = []
    for res in results:
        res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
        output_text.append(res)
    return output_text




rootdir = './Preproc_Outputs'
rootdir2 = 'results'

os.makedirs(rootdir2, exist_ok=True)

save_path = os.getcwd()
#fin_out = ("Final_output")
fin_out_complete = os.path.join(save_path, rootdir2)  
#print(fin_out_complete)


for root, dirs, files in os.walk(rootdir):
    temp_lines = []
    if "combined" in dirs:
        dirs.remove("combined")
    if "Cropped" in dirs:
        folderName = os.path.split(root)[1]
        #print(folderName)
        data_dir = Path(f"{root}/Cropped/")
        #print(data_dir)
        l = str(data_dir)
        images = sorted(list(map(str, list(data_dir.glob("*.jpg")))))
        labels = [img.split(os.path.sep)[-1].split(".jpg")[0] for img in images]
        #print(images)
        test_dataset = tf.data.Dataset.from_tensor_slices((images, labels))
        test_dataset = (test_dataset.map(encode_single_sample).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE))
        print("Predicted labels")
        print(folderName)
        #print(test_dataset)
        
        for batch in test_dataset:
            batch_images = batch["image"]
            batch_labels = batch["label"]
            preds = prediction_model.predict(batch_images)
            pred_texts = decode_batch_predictions(preds)
            new_pred = list()
            for i in range(len(pred_texts)):
                newstr = pred_texts[i].replace("[UNK]", "")
                new_pred += [newstr]
                #print(new_pred)
            
            for i in range(len(new_pred)):
                line = new_pred[i].replace("a", "א")
                line = line.replace("b", "ע")
                line = line.replace("c", "ב")
                line = line.replace("d", "ד")
                line = line.replace("e", "ג")
                line = line.replace("g", "ח")
                line = line.replace("f", "ה")
                line = line.replace("i", "ך")
                line = line.replace("h", "כ")
                line = line.replace("j", "ל")
                line = line.replace("l", "מ")
                line = line.replace("k", "ם")
                line = line.replace("m", "ן")
                line = line.replace("n", "נ")
                line = line.replace("p", "ף")
                line = line.replace("o", "פ")
                line = line.replace("q", "ק")
                line = line.replace("r", "ר")
                line = line.replace("s", "ס")
                line = line.replace("t", "ש")
                line = line.replace("u", "ת")
                line = line.replace("v", "ט")
                line = line.replace("w", "ץ")
                line = line.replace("x", "צ")
                line = line.replace("y", "ו")
                line = line.replace("z", "י")
                line = line[::-1]
                temp_lines += [line]
                #print(temp_lines)
        
                max_len = max([len(K) for K in temp_lines])
                name_of_file = str(folderName)
                completeName = os.path.join(fin_out_complete, name_of_file+".txt")         
                fo = open(completeName, 'w', encoding='utf-8')
                for z in temp_lines:
                    # each line is padded with the maximum length
                    fo.write(z.rjust(max_len) + "\n")
                fo.close()
        print(temp_lines)

Predicted labels
P168-Fg016-R-C01-R01-binarized.jpg
[' והוה    י', 'פךרסדאשודב', 'הגבאחםל', 'שוחבגסנליש', 'ןעוטדאעחטהושע', 'סטטגץןןנר', 'גהנלוס הנשה', 'עלחקור וורג', 'גץ םל  ']
