Imports

In [11]:
import numpy as np
import argparse
import time
import cv2

from imutils.object_detection import non_max_suppression

Argument Parser

In [13]:
ap = argparse.ArgumentParser()

ap.add_argument("-i", "--image", type = str, help = "path to input image")
ap.add_argument("-east", "--east", type = str, help = "path to input EAST text detector")
ap.add_argument("-c", "--min-confidence", type = float, default = 0.5, help = "minimum probability required to inspect region")
ap.add_argument("-w", "--width", type = int, default = 320, help = "resized image width(multiple of 32)")
ap.add_argument("-", "--height", type = int, default = 320, help = "resized image height(multiple of 32)")

args = vars(ap.parse_args(args=[]))


Loading Input JPG and getting dimensions

In [14]:
img = cv2.imread("tennis.jpg")
orig = img.copy()
(h, w) = img.shape[:2]

Setting new width and getting ratio

In [15]:
newW, newH = (args["width"], args["height"])
rW = w / float(newW)
rH = h / float(newH)

Resizing image and getting final dimensions

In [16]:
img = cv2.resize(img, (newW, newH))
(h, w) = img.shape[:2]

Defining output layers

In [17]:
layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]
#First layer is output sigmoid activation
#Second layer is output feature map

Loading and Feeding EAST Text Detector

In [18]:
print("[INFO] loading EAST Text Detector")
net = cv2.dnn.readNet("frozen_east_text_detection.pb")

blob = cv2.dnn.blobFromImage(img, 1.0, (w, h), (123.68, 116.78, 103.94), swapRB = True, crop = False)
start = time.time()
net.setInput(blob)
(scores, geo) = net.forward(layerNames)
end = time.time()

print("[INFO] text detection took {:.6f} seconds".format(end - start))

[INFO] loading EAST Text Detector
[INFO] text detection took 0.348351 seconds


Grabbing scores dimensions and initalizing boxes/scores

In [19]:
numR, numC = scores.shape[2:4]

rects = []
confidences = []

Looping over boxes

In [20]:
for y in range(0, numR):
    scoresData = scores[0, 0, y]
    x0Data = geo[0, 0, y]
    x1Data = geo[0, 1, y]
    x2Data = geo[0, 2, y]
    x3Data = geo[0, 3, y]
    angleData = geo[0, 4, y]

    for x in range(0, numC):
        if scoresData[x] < args["min_confidence"]:
            continue

        (offsetX, offsetY) = (4.0 * x, 4.0 * y)

        angle = angleData[x]
        cos = np.cos(angle)
        sin = np.sin(angle)

        h = x0Data[x] + x2Data[x]
        w = x1Data[x] + x3Data[x]

        endX = int(offsetX + (cos * x1Data[x]) + (sin * x2Data[x]))
        endY = int(offsetY - (sin * x1Data[x]) + (cos * x2Data[x]))

        startX = int(endX - w)
        startY = int(endY - h)

        rects.append((startX, startY, endX, endY))
        confidences.append(scoresData[x])

TypeError: list.append() takes exactly one argument (4 given)

Applying Non-Maximal Suppression

In [None]:
boxes = non_max_suppression(np.array(rects), probs = confidences)

for(startX, startY, endX, endY) in boxes:
    startX = int(startX * rW)
    startY = int(startY * rH)
    endX = int(endX * rW)
    endY = int(endY * rH)

    cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2)

cv2.imshow("Text Detection", orig)
cv2.waitKey(0)