In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np

import matplotlib

plt.rcParams["figure.figsize"] = (10, 7)
plt.rcParams['figure.subplot.left'] = 0.1
plt.rcParams['figure.subplot.right'] = 0.99
plt.rcParams['figure.subplot.top'] = 0.97
plt.rcParams['figure.subplot.bottom'] = 0.05
plt.rcParams['figure.subplot.hspace'] = 0.3
matplotlib.rc("image", cmap="gray_r")

In [2]:
%matplotlib qt

In [8]:
img_real= cv2.imread(
r"D:\WORKS\TechTasks\DocData\2.jpg"
)
plt.imshow(img_real)

<matplotlib.image.AxesImage at 0x1664f46aa10>

# Prepare

In [6]:
def downscale_image(image: np.ndarray, max_size=2048)-> np.ndarray:
    """
    Downscale image

    :param image: Input image
    :param max_size: Maxi size, defaults to 2048
    :return: Downscaled image
    """
    x, y = image.shape
    max_dimention = max(x, y)

    if max_dimention <= max_size:
        return image

    scale = max_size / max_dimention
    return cv2.resize(image, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)

In [9]:
img_gray = cv2.cvtColor(img_real, cv2.COLOR_BGR2GRAY)
img_gray = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
img_gray = downscale_image(img_gray)
plt.imshow(img_gray)

<matplotlib.image.AxesImage at 0x1664a716050>

# Find text boxes

In [210]:
from scipy.ndimage import rank_filter
from typing import NamedTuple


class BoxProperties(NamedTuple):
    """Box parameters: x,y dx,dy"""

    x: int
    y: int
    dx: int
    dy: int


class AttentionArea:
    """Find another blocks around attention area"""

    border_scale = 0.05
    """Border thickness proporitonal image size"""

    def __init__(self, attention_mask: np.ndarray, image: np.ndarray) -> None:
        """
        Create attention area around attention_mask

        :param attention_mask: Used mask to look around
        :param image: Full image
        """
        y_full, x_full = image.shape
        y_border = int(y_full * self.border_scale)
        x_border = int(x_full * self.border_scale)

        non_zero_coords = cv2.findNonZero(attention_mask)
        x, y, dx, dy = cv2.boundingRect(non_zero_coords)

        window_slice = (
            slice(np.maximum(y - y_border, 0), y + dy + y_border),
            slice(np.maximum(x - x_border, 0), x + dx + x_border),
        )
        window = np.zeros_like(image)
        window[window_slice] = image[window_slice]

        self.focus_mask = attention_mask
        self.window_mask = window

        self.focus_box = BoxProperties(x, y, dx, dy)
        self.window_box = BoxProperties(
            np.maximum(x - x_border, 0),
            np.maximum(y - y_border, 0),
            dx + 2 * x_border,
            dy + 2 * y_border,
        )

    @property
    def cropped_masks(self) -> tuple[np.ndarray, np.ndarray]:
        """Return cropped window and attention areas"""

        window_slice = (
            slice(self.window_box.y, self.window_box.y + self.window_box.dy),
            slice(self.window_box.x, self.window_box.x + self.window_box.dx),
        )
        return self.focus_mask[window_slice].copy(), self.window_mask[window_slice].copy()

    @property
    def F1_metric(self) -> float:
        """F1 metric for a clasterisation quality"""

        attention, window = self.cropped_masks
        attention[attention != 0] = 1
        window[window != 0] = 1

        dy, dx = window.shape

        tp = np.count_nonzero(attention)
        fp = dx * dy
        fn = np.count_nonzero(window - attention)
        return 2 * tp / (2 * tp + fp + fn)

    @property
    def periphery_types(self):
        return set(np.unique(self.window_mask)) - set(np.unique(self.focus_mask))

In [14]:
# def watch(attention_mask: np.ndarray, image: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
#     """
#     Get attention area around attention_mask

#     :param attention_mask: Used mask to look around
#     :param image: Full image
#     :return: Attention window and parametrs for it (what we see and where need to see)
#     """

#     border_scale = 0.05
#     y_full, x_full = image.shape
#     y_border = int(y_full * border_scale)
#     x_border = int(x_full * border_scale)
#     box = Box(attention_mask)
#     # x, y, dx, dy = cv2.boundingRect(non_zero_coords)
#     window_slice = (
#         slice(box.y - y_border, box.y + box.dy + y_border),
#         slice(box.x - x_border, box.x + box.dx + x_border),
#     )
#     window = np.zeros_like(image)
#     window[window_slice] = image[window_slice]
#     return window, box


# def get_F_metric(mask: np.ndarray, window: np.ndarray):
#     mask = mask.copy()
#     mask[mask != 0] = 1
#     window = window.copy()
#     window[window != 0] = 1

#     dy, dx = window.shape

#     tp = np.count_nonzero(mask)
#     fp = dx * dy
#     fn = np.count_nonzero(window - mask)
#     return 2 * tp / (2 * tp + fp + fn)

In [212]:
class Block(NamedTuple):
    """Paragraph mask on picture with information"""

    type: int
    """Mask order number"""

    power: int
    """Nonzero pixels count Number """

    mask: np.ndarray
    """Space mask"""


def get_blocks_map(image: np.ndarray) -> list[Block]:
    """
    Return separate text area masks on image

    :param image: Image to analyse separate text blocks
    :return: List of text area masks in the order of their power
    """
    max_rectangles = 20

    img = cv2.GaussianBlur(image, (5, 5), 5)
    img = cv2.Canny(img, 100, 200)
    img = rank_filter(img, rank=5, size=3)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    n_rectangles = max_rectangles + 1
    while n_rectangles > max_rectangles:
        img = cv2.dilate(img, kernel, iterations=1)
        n_rectangles, block_map = cv2.connectedComponents(img)
        n_rectangles -= 1
    return sorted(
        [
            Block(
                i,
                np.count_nonzero((block_map == i) * 1),
                np.uint8(block_map == i)*i,
            )
            for i in range(1, n_rectangles)
        ],
        key=lambda x: x.power,
    )

In [15]:
# a = img_gray
# a = cv2.GaussianBlur(a, (5, 5), 5)
# a = cv2.Canny(a, 100, 200)
# a = rank_filter(a, rank=5, size=3)
# kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
# n_rectangles = 1000
# while n_rectangles > 20:
#     a = cv2.dilate(a, kernel, iterations=1)
#     n_rectangles, block_map = cv2.connectedComponents(a)
#     n_rectangles -= 1

In [213]:
block_map = get_blocks_map(img_gray)
img_block = sum([i.mask for i in block_map])

text_clasters = []
while len(block_map):
    central_block = block_map.pop()
    area = AttentionArea(central_block.mask, img_block)
    start_F1 = area.F1_metric

    periphery_blocks = [block for block in block_map if block.type in area.periphery_types][::-1]

    for periphery in periphery_blocks:
        if AttentionArea(area.focus_mask + periphery.mask, img_block).F1_metric < 0.9 * start_F1:
            continue

        area.focus_mask += periphery.mask
        block_map.remove(periphery)
    text_clasters.append(area.focus_mask)

In [218]:
plt.imshow(text_clasters[1])

<matplotlib.image.AxesImage at 0x166734068f0>

In [201]:
plt.imshow(a.focus_mask)

<matplotlib.image.AxesImage at 0x16672f60f40>

In [17]:
paragraphs =[]
while len(ranged_blocks):
    focused = ranged_blocks.pop().mask.copy()
    window_mask, size = watch(focused, img_block)
    start_F1 = get_F_metric(focused, window_mask, size)

    watched_blocks = [block for block in ranged_blocks if block.i in set(np.unique(window_mask))]

    while len(watched_blocks):
        add_block = watched_blocks.pop()
        focused_new = focused + add_block.mask
        window_mask, size = watch(focused_new, img_block)
        F1 = get_F_metric(focused_new, window_mask, size)
        if F1 >= start_F1 * 0.9:
            start_F1 = F1
            focused = focused_new
            ranged_blocks.remove(add_block)
    paragraphs.append(focused)

NameError: name 'Box' is not defined

In [None]:
plt.imshow(focused_new)

In [None]:
plt.imshow(sum([paragraphs[i] * (i+1) for i in range(len(paragraphs))]))

In [None]:
plt.imshow(paragraphs[1])

In [None]:
def get_min_area_rect(image):
    non_zero_coords = cv2.findNonZero(image)
    return cv2.minAreaRect(non_zero_coords)


In [None]:
plt.imshow(img_gray)

In [None]:
def _open_transform(image, kernel_size):
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size))
    filtered_image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
    return filtered_image


# TODO: find deriv from generator
def _hightlight_text(image):
    quality = np.array([np.count_nonzero(_open_transform(image, i)) for i in range(1, 10)])
    d_quality = np.diff(quality)
    best_kernel_size = np.where(d_quality == min(d_quality))[0][0] + 1

    return _open_transform(image, best_kernel_size)


scale = 1
img = paragraphs[0]

# non_zero_coords = cv2.findNonZero(_hightlight_text(img))
# box = cv2.minAreaRect(non_zero_coords)
# angle = box[2]

# h, w = gray_image.shape
# center = (w / 2, h / 2)
# # FIXME: check another rotations
# rotate_M = cv2.getRotationMatrix2D(center, 45 - np.abs(np.abs(angle) - 45), scale)
# aligned_image = cv2.warpAffine(
#     gray_image.copy(), rotate_M, (w, h), cv2.INTER_CUBIC, cv2.BORDER_REPLICATE
# )

# plt.imshow(aligned_image)

# x, y, dx, dy = cv2.boundingRect(cv2.findNonZero(img))
# plt.imshow(img[y:y+dy,x:x+dx])


In [None]:
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
mask2 = cv2.dilate(img, kernel, iterations=5)
img2 = cv2.bitwise_and(img_gray, img_gray, mask=mask2)
plt.imshow(img2)
# plt.imshow(mask2)

In [None]:
import easyocr

In [None]:
reader = easyocr.Reader(lang_list=['ru'],gpu=False)

In [None]:
reader.readtext(img2, width_ths=0.7,height_ths=0.7,rotation_info =list(range(0,270)))