In [1]:
import cv2
import numpy as np
from imutils.object_detection import non_max_suppression
img = cv2.imread('C:/Users/omnia.abdelrahman/Pictures/OCR/Combining+EAST+and+CRNN/OCR_LNP.png')

In [2]:
## -------------  Load the pre-trained models ---------------
model = cv2.dnn.readNet('C:/Users/omnia.abdelrahman/Pictures/OCR/Combining+EAST+and+CRNN/frozen_east_text_detection.pb')
model1 = cv2.dnn.readNet('C:/Users/omnia.abdelrahman/Pictures/OCR/Combining+EAST+and+CRNN/crnn.onnx')
print(model)
print(model1)

<dnn_Net 00000124764CD1B0>
<dnn_Net 00000124764CD0B0>


In [3]:
## -------------  Prepare the image  ----------------
# use multiple of 32 to set the new img shape
height, width, _ = img.shape
new_height = (height//32)*32
new_width = (width//32)*32
print('new_height: ',new_height,' , new_width: ', new_width)

# get the ratio change in width and height
h_ratio = height/new_height
w_ratio = width/new_width
print('h_ratio: ',h_ratio,' , w_ratio: ', w_ratio)

# It returns a 4-dimensional array/blob for the input image.
blob = cv2.dnn.blobFromImage(img, 1, (new_width, new_height),(123.68, 116.78, 103.94), True, False)

new_height:  480  , new_width:  352
h_ratio:  1.025  , w_ratio:  1.0227272727272727


In [4]:
## -------------  Forward Propagation  -------------
model.setInput(blob)
(geometry, scores) = model.forward(model.getUnconnectedOutLayersNames())

In [5]:
geometry.shape

(1, 5, 120, 88)

In [6]:
scores.shape

(1, 1, 120, 88)

In [7]:
## -------------   Post-Processing  ----------------
rectangles = []
confidence_score = []
for i in range(geometry.shape[2]):
    for j in range(0, geometry.shape[3]):
        
        if scores[0][0][i][j] < 0.1:
            continue
            
        bottom_x = int(j*4 + geometry[0][1][i][j])
        bottom_y = int(i*4 + geometry[0][2][i][j])
        

        top_x = int(j*4 - geometry[0][3][i][j])
        top_y = int(i*4 - geometry[0][0][i][j])
        
        rectangles.append((top_x, top_y, bottom_x, bottom_y))
        confidence_score.append(float(scores[0][0][i][j]))

# use Non-max suppression to get the required rectangles
fin_boxes = non_max_suppression(np.array(rectangles), probs=confidence_score, overlapThresh=0.5)

In [8]:
fin_boxes

array([[142, 368, 198, 391],
       [ 67, 109, 188, 134],
       [ 69, 324, 218, 352],
       [ 91,  67, 188,  91],
       [197, 110, 270, 134],
       [223, 324, 272, 349],
       [197,  67, 245,  91]])

In [9]:
## --------------- Load the CRNN decoding functions -------------
def most_likely(scores, char_set):
    text = ""
    for i in range(scores.shape[0]):
        c = np.argmax(scores[i][0])
        text += char_set[c]
    return text

def map_rule(text):
    char_list = []
    for i in range(len(text)):
        if i == 0:
            if text[i] != '-':
                char_list.append(text[i])
        else:
            if text[i] != '-' and (not (text[i] == text[i - 1])):
                char_list.append(text[i])
    return ''.join(char_list)

def best_path(scores, char_set):
    text = most_likely(scores, char_set)
    final_text = map_rule(text)
    return final_text

In [10]:
alphabet_set = "0123456789abcdefghijklmnopqrstuvwxyz"
blank = '-'

char_set = blank + alphabet_set

In [11]:
## ---------------  Recognize the text using CRNN in each segment -----------
img_copy = img.copy()
for (x1, y1, x2, y2) in fin_boxes:

    x1 = int(x1 * w_ratio)
    y1 = int(y1 * h_ratio)
    x2 = int(x2 * w_ratio)
    y2 = int(y2 * h_ratio)
    
    segment = img[y1:y2, x1:x2, :]
    
    segment_gray = cv2.cvtColor(segment, cv2.COLOR_BGR2GRAY)
    blob = cv2.dnn.blobFromImage(segment_gray, scalefactor=1/127.5, size=(100,32), mean=127.5)
    
    model1.setInput(blob)
    scores = model1.forward()
    text = best_path(scores, char_set)

    cv2.rectangle(img_copy, (x1, y1), (x2, y2), (0, 255, 0), 2)
    cv2.putText(img_copy, text.strip(), (x1,y1-2), cv2.FONT_HERSHEY_COMPLEX, 0.7, (0,0,255),2)

cv2.imshow("Text Detection", img_copy)
cv2.waitKey(0)

-1