In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path
from collections import Counter

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from difflib import SequenceMatcher
tf.version.VERSION

'2.9.1'

In [2]:
# Path to the data directory
data_dir = Path("./Testing_lines/")

# Standard vocab to ensure correct encoding
vocab = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "+", " "]

# Get list of all the images
images = sorted(list(map(str, list(data_dir.glob("*.png")))))
labels = [img.split(os.path.sep)[-1].split(".png")[0] for img in images]
characters = set(char for label in labels for char in label)

print("Number of images found: ", len(images))
print("Number of labels found: ", len(labels))
print("Number of unique characters: ", len(characters))
print("Characters present: ", characters)

# Batch size for training and validation
batch_size = 1

# Desired image dimensions
img_width = 499
img_height = 60

# Factor by which the image is going to be downsampled
# by the convolutional blocks. We will be using two
# convolution blocks and each block will have
# a pooling layer which downsample the features by a factor of 2.
# Hence total downsampling factor would be 4.
downsample_factor = 4
# Maximum length of any line in the dataset
max_length = max([len(label) for label in labels])
print(max_length)


Number of images found:  100
Number of labels found:  100
Number of unique characters:  28
Characters present:  {'s', 'g', 'm', 'w', '+', 'c', 'x', 'a', 'u', 'y', ' ', 'p', 'h', 'i', 'j', 'r', 'f', 'd', 'v', 'b', 'l', 'k', 'o', 'q', 'e', 't', 'z', 'n'}
21


In [3]:
# Preprocessing ----------------------------------------------------------
# Mapping characters to integers
char_to_num = layers.StringLookup(
    vocabulary=vocab, mask_token=None
)

# Mapping integers back to original characters
num_to_char = layers.StringLookup(
    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
)


def encode_single_sample(img_path, label):
    # 1. Read image
    img = tf.io.read_file(img_path)
    # 2. Decode and convert to grayscale
    img = tf.io.decode_png(img, channels=1)
    # 3. Convert to float32 in [0, 1] range
    img = tf.image.convert_image_dtype(img, tf.float32)
    # 4. Resize to the desired size
    img = tf.image.resize(img, [img_height, img_width])
    # 5. Transpose the image because we want the time
    # dimension to correspond to the width of the image.
    img = tf.transpose(img, perm=[1, 0, 2])
    # 6. Map the characters in label to numbers
    label = char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8"))
    # 7. Return a dict as our model is expecting two inputs
    return {"image": img, "label": label}



# Create Dataset object --------------------------------------------------------
test_dataset = tf.data.Dataset.from_tensor_slices((images, labels))
test_dataset = (
    test_dataset.map(
        encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE
    )
    .batch(batch_size)
    .prefetch(buffer_size=tf.data.AUTOTUNE)
)




In [4]:
model = keras.models.load_model('trained_model')
model.summary()

Model: "ocr_model_v1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 image (InputLayer)             [(None, 499, 60, 1)  0           []                               
                                ]                                                                 
                                                                                                  
 Conv1 (Conv2D)                 (None, 499, 60, 32)  320         ['image[0][0]']                  
                                                                                                  
 pool1 (MaxPooling2D)           (None, 249, 30, 32)  0           ['Conv1[0][0]']                  
                                                                                                  
 Conv2 (Conv2D)                 (None, 249, 30, 64)  18496       ['pool1[0][0]']       

In [6]:
# Inference ----------------------------------
# Get the prediction model by extracting layers till the output layer

prediction_model = keras.models.Model(
    model.get_layer(name="image").input, model.get_layer(name="dense2").output
)

# A utility function to decode the output of the network
def decode_batch_predictions(pred):
    input_len = np.ones(pred.shape[0]) * pred.shape[1]
    # Use greedy search. For complex tasks, you can use beam search
    results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
        :, :max_length
    ]
    # Iterate over the results and get back the text
    output_text = []
    for res in results:
        res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
        output_text.append(res)
    return output_text

print("Predicted labels, true labels, and similarity according to SequenceMatcher's ratio()")
sum = 0.0
cnt = 0.0
for batch in test_dataset:
    batch_images = batch["image"]
    batch_labels = batch["label"]

    preds = prediction_model.predict(batch_images)
    pred_texts = decode_batch_predictions(preds)
    new_pred = list()
    for i in range(len(pred_texts)):
        newstr = pred_texts[i].replace("[UNK]", "")
        new_pred += [newstr]

    orig_texts = []
    for label in batch_labels:
        label = tf.strings.reduce_join(num_to_char(label)).numpy().decode("utf-8")
        orig_texts.append(label)

    for i in range(len(new_pred)):
        sum += SequenceMatcher(None, new_pred[i], orig_texts[i]).ratio()
        print(f"{new_pred[i]}, {orig_texts[i]}, {SequenceMatcher(None, new_pred[i], orig_texts[i]).ratio()}")
        cnt += 1


print(f"{sum}/{cnt} = {sum/cnt}")

Predicted labels, true labels, and similarity according to SequenceMatcher's ratio()
+ g+rl cv jijnt+qn, + g+rl cv jij nt+qn , 0.9473684210526315
+mwp vrhpz fedsjp, +mwp vrhpz fedsjp , 0.9714285714285714
+tfu     fkzfgm, +tfu      fkzfgv , 0.875
a cavkzbd +g sjjdl, a ca vkzbd +g sjjdl , 0.9473684210526315
a dpwvqtemh relrb, a dpwvqt e+h relrb , 0.8888888888888888
agydguat  glti, a g f da uat    glti , 0.6857142857142857
a loobww im fvamfd, a loobww iy fvamfd , 0.918918918918919
aenkne fjpbrwpkm, aenk ne fj p brwpkm , 0.8888888888888888
ahsqh xggqpg vgxv, ahsqh xggqpg vgxv , 0.9714285714285714
bdbirt rf+ smafmz, bdbirt rf+ smafmz , 0.9714285714285714
bgbtk fpdb tfjkh, bgbtk fpdb tfjkih , 0.9411764705882353
bin hwmqunamkex, bin  hwmquh amkex , 0.8484848484848485
bjcve      ar+n, bjcve       ar+n , 0.9375
bv+ ormmk uypho bq, bv+ ormmk uypho bq , 0.972972972972973
ce vjlhb rzue spen, ce vjlhb rzue spen , 0.972972972972973
cklo d+nsg cpumec, cklo d+nsg cpumec , 0.9714285714285714
cug cz kpn

sc f xflo grmmd uu, sc f xflo gdmmd fuu , 0.8947368421052632
sjobvpzt q zxenpw, sj obvpzt q zxenpw , 0.9444444444444444
tdbn hv vj bqoucb, tdben hv vj bqoucb , 0.9444444444444444
thjsxf ko qlaghfj, thj sxf ko qlaghf j , 0.918918918918919
tkt+rw ca vr+gl sr, tkt+rw ca vr+gl sr , 0.972972972972973
tvpy bmg p bqid ar+, tvpy bmg p bqid ad+ , 0.9230769230769231
uedthgm zvzsp lhm, uedthg m zvzsp lhm , 0.9444444444444444
ujpasz ph+zif zvp, ujpasz ph+zif zvp , 0.9714285714285714
uudb zoyg hslwv kg, uudb zoyg hslwv kg , 0.972972972972973
uumcwf cqdrd wucj, uumcwf cqdrd wucj , 0.9714285714285714
vaf   t vut yqpv, vaf     t vut yqpv , 0.9142857142857143
vko wma achh bziju, vko wma achh bziju , 0.972972972972973
vp xvmi sre xoqnzi, vp xvyi sre xoqhzi , 0.8648648648648649
vr+m dic++e cwyib, vr+m dic++e cwyib , 0.9714285714285714
vx kkrmsb    wm, vx kkdmsb      wm , 0.8484848484848485
vzt zmf hqgrjm mcl, vzt zmf hqgrjm mcl , 0.972972972972973
w xzn pqbm o nyzkrg, w xzn pqbm o nyzkrg , 0.974358974358