In [2]:
import cv2
import numpy as np
import imutils

import os
from matplotlib import pyplot as plt
import typing as t
import math

Working folders and load images

In [3]:
working_p = "./"
in_p = working_p + "in/"
out_p = working_p + "out/"

# order number used for giving name to output images
ordn = 0

files = [f for f in os.listdir(in_p)]

imgs = []

Utility functions

In [4]:
def save_img(img, file: str, s) -> str:
    """
    Saves the image with the formated name: <out_path>/<file_name>[-<s>].<file_ext>
    Note: <out_path> is a global variable '/' terminated
    Note: By default extension is ".png"
    
    @param img: the image to save
    @param file: the name of the file to save

    @return: the name of the file saved
    """

    file_name = file
    file_ext = "png"
    if file.count(".") > 0:
        file_name = ".".join([x for x in file.split(".")[:-1]])
        file_ext = file.split(".")[-1]

    # if type of s is string, then it is the suffix
    # else if type of s is list, then add multiple suffizes
    # else add s as it is
    suffix = ""
    if type(s) is str:
        suffix = [s]
    elif  type(s) is list:
        suffix = s
    else:
        suffix = [str(s)]

    global ordn
    file_out = out_p + file_name + "-" + str(ordn) + "-"  + "-".join(suffix) + "." + file_ext
    ordn += 1

    cv2.imwrite(file_out, img)

    return file_out


Helper functions for bitmap detection

In [5]:
def do_fft(img):
    """
    It takes in a square gray image and processes it to get the frequency domain image.

    @param img: a square gray image

    @return: a tuple of the frequency domain image and the magnitude spectrum
    """
    f = np.fft.fft2(img)
    fshift= np.fft.fftshift(f)
    magnitude_spectrum = 20*np.log(np.abs(fshift))
    return (fshift, magnitude_spectrum)

def do_ifft(fshift):
    f_ishift = np.fft.ifftshift(fshift)
    img_back = np.fft.ifft2(f_ishift)
    img_back = np.abs(img_back)
    return img_back

def do_keep_disk(fshift, sr, br):
    """
    Keeps only the disk from fshift (frequency domain image). The disk is in the center of the image.

    @param fshift: the frequency domain image (of square shape)
    @param sr: the small radius of the disk to keep (in procents, maximum is 50% of the image size)
    @param br: the big radius of the disk to keep (in procents, maximum is 50% of the image size)
    
    @return: the frequency domain image with only the disk part kept
    """

    rows = fshift.shape[0]
    cols = fshift.shape[1]
    crow,ccol = int(rows/2) , int(cols/2)
    
    big_r = int(rows * br / 100)
    small_r = int(rows * sr / 100)
    med_r = abs(int(big_r - (big_r - small_r) / 2))

    empty_img = np.zeros((rows, cols), np.uint8)
    cv2.circle(empty_img, (ccol, crow), med_r, color=1, thickness=abs(big_r - small_r))

    fshift_modified = fshift * empty_img

    return fshift_modified

def get_bitmap_of_possible_text(img, sr: int, br: int, scale: int, file = "file.png", DEBUG: bool = False):
    """
    It thakes in a square gray image and processes it to hopefully remain with a bitmap of where text if present.
    The processing steps are:

    @param img: a square gray image
    @param sr: the small radius of the disk to keep (in procents, maximum is 50% of the image size)
    @param br: the big radius of the disk to keep (in procents, maximum is 50% of the image size)
    @param scale: how much to scale the remained frequencies after the disk is kept

    @return: a bitmap of where text is present
    """

    (fshift, magnitude_spectrum_original) = do_fft(img)

    # keep only disk from fshift
    fshift = do_keep_disk(fshift, sr, br)

    # increase the frequencies
    fshift *= scale

    # do inverse fft with the filtered frequencies
    img_filtered = do_ifft(fshift)
    img_filtered = cv2.convertScaleAbs(img_filtered)

    # use blackhat
    img_filtered = cv2.morphologyEx(img_filtered, cv2.MORPH_BLACKHAT, np.ones((3,3), np.uint8))

    # use OTSU global thresholding
    thresh, img_otsu = cv2.threshold(img_filtered, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

    if DEBUG:
        # print("fshift -> " + file)
        # print(fshift)
        # print(fshift.shape)
        save_img(magnitude_spectrum_original, file, "magnitude_spectrum-original")
        save_img(20*np.log(np.abs(fshift)), file, "magnitude_spectrum-modified")
        save_img(img_filtered, file, "img_filtered")
        print(file + " OTSU T: " + str(thresh))
        save_img(img_otsu, file, "img_otsu")

    return img_otsu

def _is_noise(h, w, shape) -> bool:
    bh, bw = shape
    
    if (h < 10 or w < 10):
        return True
    elif h / w > 1.5:
        return True
    elif h / w < 0.33:
        return True
    elif  (h > bh * 0.33) and (w > bw * 0.66):
        return True
    return False

def _is_not_text(h, w, shape) -> bool:
    bh, bw = shape

    if h < 26 or w < 10:
        return True
    elif h / w > 1.5:
        return True
    elif h / w < 0.33:
        return True
    elif  (h > bh * 0.33) and (w > bw * 0.66):
        return True
    
    return False

def filter_for_text(bitmap, file = "file.png", DEBUG: bool = False):
    kernel = np.ones((3,3), np.uint8)
    result = bitmap

    # dilate
    result = cv2.dilate(result, kernel, iterations=1)

    cnts = cv2.findContours(result.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)

    ci_first_boxes = cv2.cvtColor(result, cv2.COLOR_GRAY2BGR)

    for c in cnts:
        (x, y, w, h) = cv2.boundingRect(c)
        # draw bounding rectangle
        cv2.rectangle(ci_first_boxes, (x, y), (x + w, y + h), (0, 255, 0), 1)

        if _is_noise(h, w, bitmap.shape):
            result[y:y+h, x:x+w] = 0

    # dilate once more
    result = cv2.dilate(result, kernel, iterations=4)

    ci_second_boxes = cv2.cvtColor(result, cv2.COLOR_GRAY2BGR)

    cnts = cv2.findContours(result.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    for c in cnts:
        (x, y, w, h) = cv2.boundingRect(c)
        # draw bounding rectangle
        cv2.rectangle(ci_second_boxes, (x, y), (x + w, y + h), (0, 255, 0), 1)
        
        # remove big boxes that have a lot of black pixels
        white_percentage = np.sum(result[y:y+h, x:x+w] == 255) / (h * w)
        if (white_percentage < 0.40):
            result[y:y+h, x:x+w] = 0
        elif _is_not_text(h, w, bitmap.shape):
            result[y:y+h, x:x+w] = 0
        elif (h > bitmap.shape[0] * 0.4) and (h / w < 1.0):
            result[y:y+h, x:x+w] = 0
        elif (h > bitmap.shape[0] * 0.7):
            result[y:y+h, x:x+w] = 0

    # put colorful bounding boxex on the image
    cnts = cv2.findContours(result.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    after_filter = cv2.cvtColor(result, cv2.COLOR_GRAY2BGR)
    for c in cnts:
        (x, y, w, h) = cv2.boundingRect(c)
        # draw bounding rectangle
        cv2.rectangle(after_filter, (x, y), (x + w, y + h), (0, 255, 0), 2)


    if DEBUG:
        save_img(ci_first_boxes, file, "first_boxes")
        save_img(ci_second_boxes, file, "second_boxes")
        save_img(after_filter, file, "boxes_after_filters")
        save_img(result, file, "filtered_result")

    return result

def get_bitmap(gray, file="file.png", DEBUG: bool = False):
    """
    It takes in a square gray image and returns a bitmap of where text is present.

    @param gray: a gray image
    
    @return: a bitmap of where text is present
    """

    # get bitmap of where text is present
    bitmap_raw = get_bitmap_of_possible_text(gray, sr=7, br=18, scale=12, file=file, DEBUG=DEBUG)

    # filter bitmap
    bitmap_text = filter_for_text(bitmap_raw, file=file, DEBUG=DEBUG)

    return bitmap_text


Helper functions for image segmentation

In [6]:
def get_kernel(shape, lines, collumns) -> t.Tuple[int, int]:
    """
    It takes in a bitmap of where text is present and returns a square kernel to traverse the image.

    @param shape: shape of an image to be split in <lines> and <collums>
    @param lines: the number of lines to be split
    @param collumns: the number of collums to be split

    @return: a square kernel to traverse the image
    """
    height, width = shape

    lin_size = int(height / lines)
    col_size = int(width / collumns)

    kernel_size = min(lin_size, col_size)

    return (kernel_size, kernel_size)

def get_corners_of_segments(shape, kernel, overlap):
    """
    It splits the image in smaller segments of size kernel that overlap by a percentage of overlap.
    Note: the last on a line and collumn will overlap probably more

    @param shape: the image's shape to be segmented
    @param kernel: a square
    @param overlap: the percentage of overlap (example: 30%)

    @return: a list of corners where the segments will start 
    """
    kl = kernel[0] # kernel length
    os = int(kl * overlap / 100) # overlap size
    ns = kl - os # non-overlap size

    height, width = shape

    hsn = math.ceil((height - os) / ns) # number of segments on height
    wsn = math.ceil((width  - os) / ns) # number of segments on width

    corners = []

    for i in range(0, hsn):
        for j in range(0, wsn):
            corner = [int(i * ns), int(j * ns)]
            oposite_corner = (int(corner[0] + kl), int(corner[1] + kl))

            # keep the segment in bounds even if for the last one we increase the overlay
            if oposite_corner[0] > height:
                corner[0] -= (oposite_corner[0] - height)
            if oposite_corner[1] > width:
                corner[1] -= (oposite_corner[1] - width)

            corners.append((corner[0], corner[1]))

    return corners

def create_segments(img, kernel, corners, file="file.png", DEBUG: bool = False):
    """
    From the corner list, it splits the image in smaller segments of kernel size.

    @param img: the image to be segmented
    @param kernel: a square
    @param corners: a list of corners where the segments will start

    @return: a list of new images that represent the segments
    """
    segments = []

    for corner in corners:
        s_img = img[corner[0]:corner[0]+kernel[0], corner[1]:corner[1]+kernel[1]]
        segments.append((s_img, corner))

    if DEBUG:
        for i in range(0, len(segments)):
            save_img(segments[i][0], file, "segment_" + str(i))

    return segments

def get_segments_k(img, kernel_size, overlap, file="file.png", DEBUG: bool = False):
    """
    It splits the image in smaller segments of size kernel that overlap by a
    percentage of overlap.
    Note: the last on a line and collumn will overlap probably more

    @param img: the image to be segmented
    @param kernel_size: the size of the kernel
    @param overlap: the percentage of overlap (example: 30%)

    @return: a list of new images that represent the segments
    """
    kernel = (kernel_size, kernel_size)
    segments = _get_segments(img, kernel, overlap, file, DEBUG)

    return segments

def get_segments_ck(img, lines, collumns, overlap, file="file.png", DEBUG: bool = False):
    """
    It calculates the kernel size and splits the image in smaller segments of size kernel
    that overlap by a percentage of overlap.
    Note: the last on a line and collumn will overlap probably more

    @param img: the image to be segmented
    @param lines: the number of lines to be split in
    @param collumns: the number of collums to be split in
    @param overlap: the percentage of overlap (example: 30%)

    @return: a list of new images that represent the segments
    """
    kernel = get_kernel(img.shape, lines, collumns)
    segments = _get_segments(img, kernel, overlap, file, DEBUG)

    return segments

def _get_segments(img, kernel, overlap, file="file.png", DEBUG: bool = False):

    print(file + " -> " + str(kernel), end="\n")
    corners = get_corners_of_segments(img.shape, kernel, overlap=overlap)
    segments = create_segments(img, kernel, corners, file=file, DEBUG=DEBUG)

    return segments

Voting process

In [7]:
def vote(map, bitmap, corner):
    """
    It finds boxes in the bitmap and votes for them in the map.

    @param map: the map to vote in
    @param bitmap: the bitmap to find boxes in
    @param corner: the corner where the bitmap starts

    @return: the updated vote map
    """
    # find contours in the bitmap
    cnts = cv2.findContours(bitmap.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)

    for c in cnts:
        # get the bounding box of the contour
        x, y, w, h = cv2.boundingRect(c)

        # add the contour to the map
        map[corner[0] + y : corner[0] + y + h, corner[1] + x : corner[1] + x + w] += 1
    return map

def filter_votes(voting_map, file="file.png", DEBUG: bool = False):
    # ensure all values in voting_map are uint8
    voting_map = np.uint8(voting_map)

    out_vm = voting_map.copy()

    # threshold
    vote_no: int = 3
    voting_map[voting_map < vote_no] = 0

    # make the map binary
    voting_map[voting_map > 0] = 255

    out_vm_after_threshold = voting_map.copy()

    # find the contours in the vote map
    cnts = cv2.findContours(voting_map.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)

    # for each contour, if it's too small, remove it
    for c in cnts:
        # get the bounding box of the contour
        x, y, w, h = cv2.boundingRect(c)

        # filter small boxes
        if h < 40 or w < 10:
            voting_map[y : y + h, x : x + w] = 0
            
    out_vm_after_filter_1 = voting_map.copy()
    
    # dilate the voting map
    kernel_dilate = np.ones((1, 7), np.uint8)
    voting_map = cv2.dilate(voting_map, kernel_dilate, iterations=5)

    out_vm_after_dilate = voting_map.copy()

    kernel_closing = np.ones((1, 37), np.uint8)    
    voting_map = cv2.morphologyEx(voting_map, cv2.MORPH_CLOSE, kernel_closing)

    out_vm_after_closing = voting_map.copy()

    # add boxes to the image according to the voting map
    # find the contours in the vote map
    cnts = cv2.findContours(voting_map.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)

    # for each contour, if it's too small, remove it
    for c in cnts:
        # get the bounding box of the contour
        x, y, w, h = cv2.boundingRect(c)

        # draw only the bottom half
        # if y < int(img.shape[0] * 0.5):
        #     continue

        # filter small boxes
        if h < 40 or w < 10:
            voting_map[y : y + h, x : x + w] = 0
        # filter tall boxes
        elif h > 200:
            voting_map[y : y + h, x : x + w] = 0
        # if a box is taller, check the ratio
        elif h > 100 and h / w > 0.85:
            voting_map[y : y + h, x : x + w] = 0

    out_vm_after_filter_2 = voting_map.copy()

    if DEBUG:
        save_img(cv2.equalizeHist(out_vm), file, "voting_map")
        save_img(out_vm_after_threshold, file, "voting_map_after_threshold")
        save_img(out_vm_after_filter_1, file, "voting_map_after_filter_1")
        save_img(out_vm_after_dilate, file, "voting_map_after_dilate")
        save_img(out_vm_after_closing, file, "voting_map_after_closing")
        save_img(out_vm_after_filter_2, file, "voting_map_after_filter_2")

    return voting_map

out_text_region = "./detected/"
out_text_file_order = 0
def save_detexted_text_region(img, contour):
    global out_text_file_order
    # get the bounding box of the contour
    x, y, w, h = cv2.boundingRect(contour)
    # save the img box in another image
    # format file name to 000n.png
    file_name = str(out_text_file_order).zfill(4)
    cv2.imwrite(out_text_region + file_name + ".png", img[y : y + h, x : x + w])
    out_text_file_order += 1


Main

In [9]:
out_text_file_order = 0

for file in files:
    ordn = 0

    # if file != "IMG_1294-1-5.JPG":
    #     continue

    img = cv2.imread(in_p + file, cv2.IMREAD_COLOR)

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    segments = get_segments_k(gray, kernel_size=400, overlap=75, file=file, DEBUG=False)

    voting_map = np.zeros(gray.shape)

    # for (s_img, corner) in segments[61:70]:
    for (s_img, corner) in segments[len(segments)//2:]:
        # print(file + " | corner -> " + str(corner) + " | out -> " + str(ordn), end="\n")
        bitmap = get_bitmap(s_img, file=file, DEBUG=False)
        voting_map = vote(voting_map, bitmap, corner)
    # TO_DO: move voting map filtering from here
    
    voting_map = filter_votes(voting_map, file=file, DEBUG=False)

    # find connected components
    cnts = cv2.findContours(voting_map.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)

    # draw the bounding box
    print("\t contours found:", len(cnts))
    iwb = img.copy()
    for c in cnts:
        # get the bounding box of the contour
        x, y, w, h = cv2.boundingRect(c)
        save_detexted_text_region(img, c)
        iwb = cv2.rectangle(iwb, (x, y), (x + w, y + h), (0, 255, 0), 2)

    save_img(iwb, file, "img_with_boxes")

IMG_1286-1-5.JPG -> (400, 400)
	 contours found: 45
IMG_1287-2-5.JPG -> (400, 400)
	 contours found: 35
IMG_1288-3-5.JPG -> (400, 400)
	 contours found: 23
IMG_1289-4-5.JPG -> (400, 400)
	 contours found: 21
IMG_1290-1-5.JPG -> (400, 400)
	 contours found: 16
IMG_1291-2-5.JPG -> (400, 400)
	 contours found: 12
IMG_1292-3-5.JPG -> (400, 400)
	 contours found: 24
IMG_1293-4-5.JPG -> (400, 400)
	 contours found: 28
IMG_1294-1-5.JPG -> (400, 400)
	 contours found: 19
IMG_1295-2-5.JPG -> (400, 400)
	 contours found: 24
IMG_1296-3-5.JPG -> (400, 400)
	 contours found: 41
IMG_1297-4-5.JPG -> (400, 400)
	 contours found: 29
IMG_1298-1-5.JPG -> (400, 400)
	 contours found: 27
IMG_1299-2-5.JPG -> (400, 400)
	 contours found: 21
IMG_1300-3-5.JPG -> (400, 400)
	 contours found: 23
IMG_1301-4-5.JPG -> (400, 400)
	 contours found: 25
IMG_1302-1-5.JPG -> (400, 400)
	 contours found: 31
IMG_1303-2-5.JPG -> (400, 400)
	 contours found: 34
IMG_1304-3-5.JPG -> (400, 400)
	 contours found: 24
IMG_1305-4-5