<a href="https://colab.research.google.com/github/MonicaSai7/OCR-using-CRNN/blob/master/Evaluate.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


# Libraries

In [0]:
import os
import string
import cv2
import h5py
import fnmatch
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import backend as K
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPool2D, BatchNormalization, Lambda, Bidirectional, LSTM, Dense

# Data Acquisition

In [0]:
with h5py.File('mjsynth_test.hdf5', 'r') as f:
    print(list(f.keys()))
    test_img = f.get('test_img').value
    test_labels = f.get('test_labels').value

['test_img', 'test_labels']


In [0]:
test_img[0]
test_labels[0]

'kaunas'

In [0]:
char_list = string.ascii_letters + string.digits
print(char_list)
print(len(char_list))

abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789
62


In [0]:
def encode_to_labels(text):
    code = []
    for index, char in enumerate(text):
        code.append(char_list.index(char))
    return code

In [0]:
length_checker = np.vectorize(len) 
max_test_label_len = max(length_checker(test_labels))
max_label_len = max_test_label_len

Label length is the length of each output text label and input length is the same for each input to the LSTM layer which is 31 in our architecture.

In [0]:
test_label_length = []
test_input_length = []
test_txt = []

for i in range(len(test_labels)):
    test_label_length.append(len(test_labels[i]))
    test_input_length.append(31)
    test_txt.append(encode_to_labels(test_labels[i]))

In [0]:
test_padded_txt = pad_sequences(test_txt, maxlen=max_label_len, padding='post', value = len(char_list))

# Model Architecture

### Model = CNN + RNN + CTC loss

In [0]:
# input with shape of height=32 and width=128 
inputs = Input(shape=(32,128,1))
 
# convolution layer with kernel size (3,3)
conv_1 = Conv2D(64, (3,3), activation = 'relu', padding='same')(inputs)
# poolig layer with kernel size (2,2)
pool_1 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_1)
 
conv_2 = Conv2D(128, (3,3), activation = 'relu', padding='same')(pool_1)
pool_2 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_2)
 
conv_3 = Conv2D(256, (3,3), activation = 'relu', padding='same')(pool_2)
 
conv_4 = Conv2D(256, (3,3), activation = 'relu', padding='same')(conv_3)
# poolig layer with kernel size (2,1)
pool_4 = MaxPool2D(pool_size=(2, 1))(conv_4)
 
conv_5 = Conv2D(512, (3,3), activation = 'relu', padding='same')(pool_4)
# Batch normalization layer
batch_norm_5 = BatchNormalization()(conv_5)
 
conv_6 = Conv2D(512, (3,3), activation = 'relu', padding='same')(batch_norm_5)
batch_norm_6 = BatchNormalization()(conv_6)
pool_6 = MaxPool2D(pool_size=(2, 1))(batch_norm_6)
 
conv_7 = Conv2D(512, (2,2), activation = 'relu')(pool_6)
 
squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv_7)
 
# bidirectional LSTM layers with units=128
blstm_1 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(squeezed)
blstm_2 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(blstm_1)
 
outputs = Dense(len(char_list)+1, activation = 'softmax')(blstm_2)
 
act_model = Model(inputs, outputs)

In [0]:
labels = Input(name='the_labels', shape=[max_label_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
 
 
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
 
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
 
 
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([outputs, labels, input_length, label_length])
model = Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)

In [0]:
type(labels)

tensorflow.python.framework.ops.Tensor

In [0]:
# load the saved best model weights
act_model.load_weights('best_model.hdf5')

# predict outputs on validation images
prediction = act_model.predict(test_img)
 
# use CTC decoder
out = K.get_value(K.ctc_decode(prediction, input_length=np.ones(prediction.shape[0])*prediction.shape[1],
                         greedy=True)[0][0])

In [0]:
# see the results
i = 0
count = 0
for x in out:
    #print(valid_orig_txt[i])
    r = []
    for p in x:  
        if int(p) != -1:
            #print(char_list[int(p)], end = '')   
            r.append(char_list[int(p)])    
    #print('\n')
    print(test_labels[i],''.join(r) )
    if test_labels[i].lower() == ''.join(r).lower():
      count += 1
    i+=1
print(count,'/',i,'=',count/i)

## Check for dynamically uploaded image

In [0]:
addr = 'test/141_Correspondents_17170.jpg'
test_label = "Correspondents"

img = cv2.imread(addr)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

scale_percent = 50

#calculate the 50 percent of original dimensions
width = 32
height = 128

# dsize
dsize = (height, width)
img = cv2.resize(img, dsize)
img = np.expand_dims(img , axis = 2)
img = img/255.

In [0]:
img.shape

(32, 128, 1)

In [0]:
# load the saved best model weights
act_model.load_weights('best_model.hdf5')

# predict outputs on validation images
prediction = act_model.predict(img[np.newaxis,:,:])
 
# use CTC decoder
out = K.get_value(K.ctc_decode(prediction, input_length=np.ones(prediction.shape[0])*prediction.shape[1],
                         greedy=True)[0][0])

In [0]:
# see the results
count = 0
for x in out:
    #print(valid_orig_txt[i])
    r = []
    for p in x:  
        if int(p) != -1:
            #print(char_list[int(p)], end = '')   
            r.append(char_list[int(p)])    
    #print('\n')
    print(test_label,''.join(r) )

Correspondents Correspondents
