# Sachin Jeshani

# OpenCV OCR and Gujarati text recognition with Tesseract

- Supporting packages

In [1]:
import cv2
import pytesseract
import imutils         
from imutils.object_detection import non_max_suppression
import numpy as np
import argparse

from PIL import ImageFont, ImageDraw, Image

%matplotlib inline
from matplotlib import pyplot as plt

import warnings
warnings.filterwarnings("ignore")

- Import image

In [2]:
image = '2021-01-20/1B0E39B.jpg'

- Text geometry prediction

In [3]:
min_confidence = 0.9

def decode_predictions(scores, geometry):
    # grab the number of rows and columns from the scores volume, then
    # initialize our set of bounding box rectangles and corresponding
    # confidence scores
    (numRows, numCols) = scores.shape[2:4]
    rects = []
    confidences = []
    # loop over the number of rows
    for y in range(0, numRows):
        # extract the scores (probabilities), followed by the
        # geometrical data used to derive potential bounding box
        # coordinates that surround text
        scoresData = scores[0, 0, y]
        xData0 = geometry[0, 0, y]
        xData1 = geometry[0, 1, y]
        xData2 = geometry[0, 2, y]
        xData3 = geometry[0, 3, y]
        anglesData = geometry[0, 4, y]
        # loop over the number of columns
        for x in range(0, numCols):
            # if our score does not have sufficient probability,
            # ignore it
            if scoresData[x] < min_confidence:
                continue
            # compute the offset factor as our resulting feature
            # maps will be 4x smaller than the input image
            (offsetX, offsetY) = (x * 4.0, y * 4.0)
            # extract the rotation angle for the prediction and
            # then compute the sin and cosine
            angle = anglesData[x]
            cos = np.cos(angle)
            sin = np.sin(angle)
            # use the geometry volume to derive the width and height
            # of the bounding box
            h = xData0[x] + xData2[x]
            w = xData1[x] + xData3[x]
            # compute both the starting and ending (x, y)-coordinates
            # for the text prediction bounding box
            endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
            endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
            startX = int(endX - w)
            startY = int(endY - h)
            # add the bounding box coordinates and probability score
            # to our respective lists
            rects.append((startX, startY, endX, endY))
            confidences.append(scoresData[x])
    # return a tuple of the bounding boxes and associated confidences
    return (rects, confidences)

- Reshaping the image 

In [4]:
# load the input image and grab the image dimensions
image = cv2.imread(image)
orig = image.copy()
(origH, origW) = image.shape[:2]
# set the new width and height and then determine the ratio in change
# for both the width and height
(newW, newH) = ((origW//32)*32, (origH//32)*32)
rW = origW / float(newW)
rH = origH / float(newH)
# resize the image and grab the new image dimensions
image = cv2.resize(image, (newW, newH))
(H, W) = image.shape[:2]

- Import EAST text detector model

In [5]:
# define the two output layer names for the EAST detector model that
# we are interested in -- the first is the output probabilities and the
# second can be used to derive the bounding box coordinates of text
layerNames = [
    "feature_fusion/Conv_7/Sigmoid",
    "feature_fusion/concat_3"]
# load the pre-trained EAST text detector
print("[INFO] loading EAST text detector...")
net = cv2.dnn.readNet('frozen_east_text_detection.pb')

[INFO] loading EAST text detector...


- Region of interest box prediction

In [6]:
# construct a blob from the image and then perform a forward pass of
# the model to obtain the two output layer sets
blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), 
                             (123.68, 116.78, 103.94), swapRB=True, crop=False)
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)
# decode the predictions, then  apply non-maxima suppression to
# suppress weak, overlapping bounding boxes
(rects, confidences) = decode_predictions(scores, geometry)
boxes = non_max_suppression(np.array(rects), probs=confidences)

- Convert ROI image into string

In [7]:
padding = 0.00
results = []

# loop over the bounding boxes
for (startX, startY, endX, endY) in boxes:
    # scale the bounding box coordinates based on the respective
    # ratios
    startX = int(startX * rW)
    startY = int(startY * rH)
    endX = int(endX * rW)
    endY = int(endY * rH)
    # in order to obtain a better OCR of the text we can potentially
    # apply a bit of padding surrounding the bounding box -- here we
    # are computing the deltas in both the x and y directions
    dX = int((endX - startX) * padding)
    dY = int((endY - startY) * padding)
    # apply padding to each side of the bounding box, respectively
    startX = max(0, startX - dX)
    startY = max(0, startY - dY)
    endX = min(origW, endX + (dX * 2))
    endY = min(origH, endY + (dY * 2))
    # extract the actual padded ROI
    roi = orig[startY:endY, startX:endX]
    # in order to apply Tesseract v4 to OCR text we must supply
    # (1) a language, (2) an OEM flag of 4, indicating that the we
    # wish to use the LSTM neural net model for OCR, and finally
    # (3) an OEM value, in this case, 7 which implies that we are
    # treating the ROI as a single line of text
    config = ("-l guj --oem 1 --psm 7")
    text = pytesseract.image_to_string(roi, config=config)
    # add the bounding box coordinates and OCR'd text to the list
    # of results
    results.append(((startX, startY, endX, endY), text))

In [8]:
results

[((12, 257, 41, 275), 'નિર્ણય\n\x0c'),
 ((10, 184, 106, 201), "“વિજ્ઞાપનદાતાની'\n\x0c"),
 ((140, 89, 197, 107), 'કસદ%થિ ત્રમાં\n\x0c'),
 ((11, 91, 58, 107), 'વાચકોને\n\x0c'),
 ((49, 60, 167, 82), 'ડીસ્કલેમર સ્પષ્ટતા\n\x0c'),
 ((45, 258, 79, 275), 'લેવાનો\n\x0c'),
 ((11, 108, 45, 126), 'પ્રપિદ્ધ\n\x0c'),
 ((65, 89, 123, 107), '૪ઝણાવવાને\n\x0c'),
 ((61, 127, 103, 146), 'માહિતી\n\x0c'),
 ((12, 240, 39, 258), 'તપાર\n\x0c'),
 ((80, 107, 126, 126), 'કોઈપણ\n\x0c'),
 ((108, 239, 168, 256), 'જવાબદારી\n\x0c'),
 ((124, 275, 195, 292), '“વ્યવસ્થાપક\n\x0c'),
 ((83, 144, 159, 164), 'વિજ્ઞાપનદાતા\n\x0c'),
 ((109, 219, 150, 239), 'પોતાની\n\x0c'),
 ((61, 20, 189, 54), '૩૬૦ગિગ્ર\n\x0c'),
 ((97, 127, 142, 146), 'સથવા\n\x0c'),
 ((154, 201, 196, 219), "'વાબદારી\n\x0c"),
 ((70, 240, 107, 258), 'પોતાની\n\x0c'),
 ((142, 126, 185, 145), 'ઉત્પાદન\n\x0c'),
 ((137, 108, 201, 127), 'હેરખબારમાં\n\x0c'),
 ((158, 165, 196, 181), '1કાદારી\n\x0c'),
 ((45, 240, 72, 258), 'કરીને\n\x0c'),
 ((79, 259, 111, 275), 'રહેશે,\n\

- Inspecting result

In [17]:
# sort the results bounding box coordinates from top to bottom
results = sorted(results, key=lambda r:r[0][1])
# loop over the results
for ((startX, startY, endX, endY), text) in results[:-1]:
    # display the text OCR'd by Tesseract
    print("OCR TEXT")
    print("========")
    print("{}\n".format(text))
    # strip out non-ASCII text so we can draw the text on the image
    # using OpenCV, then draw the text and a bounding box surrounding
    # the text region of the input image
    #text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
    text = text.strip()
    output = orig.copy()
    cv2.rectangle(output, (startX, startY), (endX, endY), (0, 0, 255), 2)
    
    fontpath = 'arial-unicode-ms.ttf' 
    font = ImageFont.truetype(fontpath, 32)
    img_pil = Image.fromarray(output)
    draw = ImageDraw.Draw(img_pil)
    draw.text((startX, startY - 50),  text, font = font, fill = (0,0,255,255))
    output = np.array(img_pil)


    #cv2.putText(output, text, (startX, startY - 20), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3)
    # show the output image
    output = cv2.resize(output, (500, 600))
    cv2.imshow("Text Detection", output)
    #cv2.waitKey(0)
    if cv2.waitKey(0) & 0xFF == ord('q'):
        break
cv2.destroyAllWindows()

OCR TEXT
૩૬૦ગિગ્ર


OCR TEXT
ડીસ્કલેમર સ્પષ્ટતા


OCR TEXT
કસદ%થિ ત્રમાં


OCR TEXT
૪ઝણાવવાને


OCR TEXT
વાચકોને


OCR TEXT
કોઈપણ


OCR TEXT
પ્રપિદ્ધ


OCR TEXT
હેરખબારમાં


OCR TEXT
થયેલ


OCR TEXT
ઉત્પાદન


OCR TEXT
માહિતી


OCR TEXT
સથવા


OCR TEXT
જદાવેલ


OCR TEXT
વિજ્ઞાપનદાતા


OCR TEXT
કરેલ


OCR TEXT
સેવા


OCR TEXT
સંપર્ણ


