In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np

import matplotlib

plt.rcParams["figure.figsize"] = (10, 7)
plt.rcParams['figure.subplot.left'] = 0.1
plt.rcParams['figure.subplot.right'] = 0.99
plt.rcParams['figure.subplot.top'] = 0.97
plt.rcParams['figure.subplot.bottom'] = 0.05
plt.rcParams['figure.subplot.hspace'] = 0.3
matplotlib.rc("image", cmap="gray_r")

In [2]:
%matplotlib qt

In [3]:
real_image= cv2.imread(
r"D:\WORKS\TechTasks\DocData\2.jpg"
)
plt.imshow(real_image)

<matplotlib.image.AxesImage at 0x19764fef6d0>

# Prepare

In [4]:
gray_image = cv2.cvtColor(real_image, cv2.COLOR_BGR2GRAY)
gray_image = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
plt.imshow(gray_image)

<matplotlib.image.AxesImage at 0x197680d4970>

In [5]:
def downscale_image(image, max_size=2048):
    """Shrink im until its longest dimension is <= max_dim.

    Returns new_image, scale (where scale <= 1).
    """

    x, y = image.shape
    max_dimention = max(x, y)
    if max_dimention <= max_size:
        return image

    scale = max_size / max_dimention
    return cv2.resize(
        image, None, fx= scale, fy= scale, interpolation=cv2.INTER_LINEAR
    )


gray_image = downscale_image(gray_image)
plt.imshow(gray_image)

<matplotlib.image.AxesImage at 0x19765d7b9d0>

# Find text boxes

In [6]:
from scipy.ndimage import rank_filter
from typing import NamedTuple

In [7]:
class BlockTuple(NamedTuple):
    i: int
    power: int
    mask: np.ndarray


a = gray_image
a = cv2.GaussianBlur(a, (5, 5), 5)
a = cv2.Canny(a, 100, 200)
a = rank_filter(a, rank=5, size=3)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
n_rectangles = 1000
while n_rectangles > 20:
    a = cv2.dilate(a, kernel, iterations=1)
    n_rectangles, block_map = cv2.connectedComponents(a)
    n_rectangles -= 1

In [10]:
ranged_blocks: list[BlockTuple] = sorted(
    [
        BlockTuple(
            i,
            np.count_nonzero((block_map == i) * 1),
            np.uint8(block_map == i),
        )
        for i in range(1, n_rectangles)
    ],
    key=lambda x: x.power,
)
print(n_rectangles - 1)
# a = cv2.dilate(a, kernel, iterations=5)
plt.imshow(block_map)

15


<matplotlib.image.AxesImage at 0x1977b099fc0>

In [11]:
def eye_metric(mask, image):
    non_zero_coords = cv2.findNonZero(mask)
    x, y, dx, dy = cv2.boundingRect(non_zero_coords)

    central_slice = slice(y, y + dy), slice(x, x + dx)
    central = np.zeros_like(image)
    central[central_slice] = image[central_slice]
    central[central != 0] = 1

    y_full, x_full = image.shape
    y_border = y_full // 20
    x_border = x_full // 20
    periphery_slice = (
        slice(y - y_border, y + y_border + dy),
        slice(x - x_border, x + x_border + dx),
    )
    periphery = np.zeros_like(image)
    periphery[periphery_slice] = image[periphery_slice]
    periphery[periphery != 0] = 1

    tp = np.count_nonzero(central)
    fp = dx * dy - tp
    fn = np.count_nonzero(periphery - central)

    return 2 * tp / (2 * tp + fp + fn), central, periphery

In [196]:
paragraphs = []
while len(ranged_blocks) > 4:
    central = ranged_blocks.pop()
    current = central.mask.copy()

    watched_blocks = [block for block in ranged_blocks if block.i in set(np.unique(bord))]
    periphery = current + sum([block.mask for block in watched_blocks])
    m0, img, bord = eye_metric(current, block_map)

    while len(watched_blocks) > 0:
        add_block = watched_blocks.pop()
        m, img, bord = eye_metric(current + add_block.mask, block_map)
        if m >= m0 * 0.9:
            m0 = m
            current += add_block.mask
            ranged_blocks.remove(add_block)

    paragraphs.append(current)

print(m)
plt.imshow(current)

0.16518012221792758


<matplotlib.image.AxesImage at 0x1942489f4f0>

In [None]:
current
m, img, bord = eye_metric(current + add_block.mask, block_map)
plt.imshow(img * 10 + bord)

In [197]:
plt.imshow(sum([i*np.count_nonzero(i) for i in paragraphs]))

<matplotlib.image.AxesImage at 0x1941f5f1210>

In [193]:
plt.imshow(real_image)

<matplotlib.image.AxesImage at 0x194208b2320>

In [None]:
def get_min_area_rect(image):
    non_zero_coords = cv2.findNonZero(image)
    return cv2.minAreaRect(non_zero_coords)
