In [1]:
import cv2
import numpy as np
from statistics import mean
import pytesseract
#from pytesseract import Output

from imutils.object_detection import non_max_suppression

In [2]:
# Read pictures
imagePath = 'Tests/Capture.png'
img = cv2.imread(imagePath)

In [3]:
# Grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
vis = img.copy()
orig = img.copy()

# Calling MSER algorithm
mser = cv2.MSER_create(_delta=5, _min_area=10, _max_area=100, _max_variation=0.1)
regions, bboxes = mser.detectRegions(gray)  # Get the text area


In [4]:
hulls = [cv2.convexHull(p.reshape(-1, 1, 2)) for p in regions]  # Drawing text areas
area_pixels = [len(a) for a in regions]
hull_areas = [cv2.contourArea(a) for a in hulls]
cv2.polylines(img, hulls, 1, (0, 255, 0));

#areas = [cv2.contourArea(a) for a in hulls]

In [5]:
# Processing irregular detection boxes into rectangular boxes
rect_list = []

for contour in hulls:
    x, y, w, h = cv2.boundingRect(contour)
    cv2.rectangle(vis, (x, y), (x + w, y + h), (255, 255, 0), 1)
    area = cv2.contourArea(contour)
    rect_list.append([x, y, x + w, y + h]) 

cv2.imshow("hulls", vis)
cv2.waitKey(0)
cv2.destroyAllWindows()
#cv2.imwrite("pid3.png", vis)

In [6]:
def region_props(rect_array, region_areas, contour_area_array):
    """
    Calculates region properties: aspect_ratio, rectangular area, solidity, extent.
    
    rect_array: numpy array of rectangular coordinates of form (x1, y1, y1, y2)
    region_area_array: numpy array of region array.  Area being number of pixels in blob 
        returned by algorithm like MSER
    contour_area_array: numpy array of contour areas.  e.g.cv2.controurArea(hulls)
    
    Returns: numpy areas for aspect_ratio, rectangular area, solidity, extent
    
         ------> X-COORD
        |
        |
        |
        V
        Y-COORD
    
    """
    x1 = rect_array[:,0]
    y1 = rect_array[:,1]
    x2 = rect_array[:,2]
    y2 = rect_array[:,3]

    # Calculate region props
    aspect_ratio = (x2 - x1) / (y2 - y1)
    rect_areas = (x2 - x1) * (y2 - y1)
    solidity = region_areas/(contour_area_array + 1e-10)
    extent = (region_areas)/rect_areas 
    
    return aspect_ratio, rect_areas, solidity, extent

In [7]:
def props_filter(rect_array, aspect_ratio, solidity, extent):
    """
    TODO: Make arguments optional, Pass filter settings (thresholds) to function 
    Applies a filter to rect_array removing areas that do not meet criteria.  
    Often used with region_props function -> see region_props for property defintions
    
    rect_array: numpy array of rectangular coordinates of form (x1, y1, y1, y2)
    aspect ratio: aspect ratio array corresponding to rectangles in rect_array
    extent: extent array corresponding to rectangles in rect_array
    solidity: solidity array corresponding to rectangles in rect_array
    
    Returns: filtered numpy array rect_array
    """
    
    # Apply filters based on region properties
    aspect_filter = aspect_ratio > 3
    extent_filter = (extent < 0.2) | (extent > 0.9)
    solidity_filter = solidity < 0.3

    rect_filter = rect_array[~aspect_filter & ~extent_filter & ~solidity_filter]
    
    return rect_filter

In [8]:
rect_array = np.array(rect_list)
hull_area_array = np.array(hull_areas)

aspect_ratio, _, solidity, extent = region_props(rect_array, area_pixels, hull_area_array)

rect_filter = props_filter(rect_array, aspect_ratio, solidity, extent)

In [9]:
# Creating filter on non text entities
for rect in rect_filter:
    x1, y1, x2, y2 = rect[0], rect[1], rect[2], rect[3]
    cv2.rectangle(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
    
cv2.imshow("filter", vis)
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.imwrite("pid4.png", vis)

True

In [10]:
idxs = np.argsort(rect_filter[:,0])
sorted_rects = rect_filter[idxs]

In [11]:
def horz_rect_merge(sorted_rects, x_threshold=20, h_fraction=0.5):
    """ 
    Merges rectangle ROI's along in the horizontal axis

    sorted_rects: rectangle numpy array sorted from top left corner to bottom right. 
        coordinates of (x1, y1, x2, y2)
    x_threshold: how close in pixels the rects have to be to be merged.
    h_fraction: fraction of box height that is allowable for rects to be merged in vertical
        must be between 0 < h_fraction <= 1 (can technically be > 1)

    Returns accepted_rects - list of merged rectangular coordinates
    of form x1, y1, x2, y2

    ------> X-COORD
    |
    |
    |
    V
    Y-COORD
        
    """

    # Grab the coordinates of the bounding boxes
    x1 = sorted_rects[:,0]
    y1 = sorted_rects[:,1]
    x2 = sorted_rects[:,2]
    y2 = sorted_rects[:,3]
    y_center = (y1 + y2) / 2
    
    rects_used = np.full((sorted_rects.shape[0]), False)

    # Array of accepted rects
    accepted_rects = []

    # Iterate all initial bounding rects
    for idx, box in enumerate(sorted_rects):
        if (rects_used[idx] == False) and (idx < sorted_rects.shape[0]-1):

            # Initialize current rect
            curr_x_min = box[0] 
            curr_x_max = box[2]
            curr_y_min = box[1]
            curr_y_max = box[3]
            curr_y_center = (curr_y_max + curr_y_min) / 2

            # This bounding rect is used
            rects_used[idx] = True

            # Find the largest (x, y) coordinates for the start of the bounding box and 
            # the smallest (x, y) coordinates for the end of the bounding box
            x_min = sorted_rects[idx+1:,0]
            x_max = np.maximum(curr_x_max, x2[idx+1:])
            y_max = np.maximum(curr_y_max, y2[idx+1:])
            y_min = np.minimum(curr_y_min, y1[idx+1:])

            # Check if distance between current rect and merge candidates is small enough
            x_dist = x_min - curr_x_max #TODO UPDATE X_DIST ONCE MERGED...MAKE THIS INTO A FUNCTION AND RUN CONSECUTIVE TIMES
            #y_dist = abs(curr_y_min - sorted_rects[idx+1:,1]) 
            y_dist = abs(curr_y_center - y_center[idx+1:])
            y_threshold = (curr_y_max - curr_y_min) * h_fraction
            
            # TAKE THE DISTANCES < THRESH AND COMPUTE NEW BOUNDING BOX...
            if (x_max[(x_dist < x_threshold) & (y_dist < y_threshold)].size != 0):
                curr_x_max = np.max(x_max[(x_dist < x_threshold) & (y_dist < y_threshold)])
                curr_y_max = np.max(y_max[(x_dist < x_threshold) & (y_dist < y_threshold)])
                curr_y_min = np.min(y_min[(x_dist < x_threshold) & (y_dist < y_threshold)])

            # Merge candidates (bounding rect) are used
            rects_used[idx+1:] = np.where((x_dist < x_threshold) & (y_dist < y_threshold), True, rects_used[idx+1:])

            # No more merge candidates possible, accept current rect
            accepted_rects.append([curr_x_min, curr_y_min, curr_x_max, curr_y_max])

        else:
            continue

    return accepted_rects

In [12]:
#TODO: determine why rects are still overlapping.  Seems to be related to the h_fraction variable - how to tune
#TODO: Run tesseract or east detector

accepted_rects = horz_rect_merge(sorted_rects, 20, 0.5)
#accepted_rects = horz_rect_merge(np.array(accepted_rects), 15, 0.3)
#accepted_rects = horz_rect_merge(np.array(accepted_rects), 0, 1)

len(accepted_rects)

205

In [13]:
for (startX, startY, endX, endY) in accepted_rects:
    # draw the bounding box on the image
    cv2.rectangle(vis, (startX, startY), (endX, endY), (255, 0, 155), 2)
cv2.imshow("rects", vis)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [14]:
cv2.imwrite("pid3.png", vis)

True

In [None]:
#TODO: post filtering??
accepted_rects = np.array(accepted_rects)
aspect_ratio, _, solidity, extent = region_props(accepted_rects, area_pixels, hull_area_array)

rect_filter = props_filter(accepted_rects, aspect_ratio, solidity, extent)

In [38]:
# Adding custom options
custom_config = r'--oem 1 --psm 7'

padding = 5
results = []

for (startX, startY, endX, endY) in accepted_rects:
    # in order to obtain a better OCR of the text we can potentially
    # apply a bit of padding surrounding the bounding box -- here we
    # are computing the deltas in both the x and y directions
    dX = int((endX - startX) * padding)
    dY = int((endY - startY) * padding)
    
    # apply padding to each side of the bounding box, respectively
    startX = max(0, startX - dX)
    startY = max(0, startY - dY)
    #endX = min(origW, endX + (dX * 2))
    #endY = min(origH, endY + (dY * 2))
    endX = endX + (dX )
    endY = endY + (dY )
    
    # Get pixel data from original image
    roi = orig[startY:endY, startX:endX]
        
    text = pytesseract.image_to_string(roi, config=custom_config)
    
    # add the bounding box coordinates and OCR'd text to the list of results
    results.append(((startX, startY, endX, endY), text))

KeyboardInterrupt: 

In [39]:
results

[((0, 1890, 129, 2099), 'I'),
 ((0, 1930, 129, 2095), ''),
 ((0, 1968, 129, 2111), '[]'),
 ((0, 2014, 309, 2168), '| cee'),
 ((13, 1857, 112, 1967), ''),
 ((13, 1960, 112, 2081), ''),
 ((8, 2066, 118, 2143), ''),
 ((13, 1877, 112, 2086), ''),
 ((13, 1724, 112, 1955), '|'),
 ((13, 1856, 112, 1933), ''),
 ((13, 1858, 112, 1979), ''),
 ((13, 1902, 112, 2023), ''),
 ((0, 1859, 299, 2255), ': RE'),
 ((0, 1839, 725, 2301), '| —— —'),
 ((19, 1925, 107, 2134), 'i'),
 ((19, 2053, 107, 2207), ''),
 ((19, 1903, 107, 1969), ''),
 ((19, 1910, 107, 1976), ''),
 ((19, 2115, 107, 2170), '2'),
 ((19, 1773, 107, 1960), ''),
 ((19, 1811, 107, 1943), ''),
 ((19, 1885, 107, 1918), 'g |'),
 ((19, 1774, 107, 1851), '|'),
 ((19, 1787, 107, 1853), '‘|'),
 ((19, 1697, 107, 1928), '|'),
 ((19, 1968, 107, 2034), 'i'),
 ((0, 1893, 627, 2146), ': oe —'),
 ((0, 2003, 375, 2135), ''),
 ((0, 1933, 375, 2142), ''),
 ((0, 1947, 543, 2233), 'i : ae ea _'),
 ((0, 1957, 364, 2199), 'h : eae'),
 ((0, 1943, 617, 2075), ''),
