In [None]:
from typing import NamedTuple

import cv2
import matplotlib
import matplotlib.pyplot as plt
import numpy as np

plt.rcParams["figure.figsize"] = (10, 7)
plt.rcParams['figure.subplot.left'] = 0.1
plt.rcParams['figure.subplot.right'] = 0.99
plt.rcParams['figure.subplot.top'] = 0.97
plt.rcParams['figure.subplot.bottom'] = 0.05
plt.rcParams['figure.subplot.hspace'] = 0.3
matplotlib.rc("image", cmap="gray_r")

In [None]:
%matplotlib qt

In [None]:
img_real = cv2.imread(r"D:\WORKS\TechTasks\DocData\2.jpg")
y, x, _ = img_real.shape
plt.imshow(img_real)

# Prepare

In [None]:
def show(image: np.ndarray):
    dy, dx = image.shape[:2]
    y_size = 960
    size = (int(dx * y_size / dy), y_size)

    cv2.imshow('contours', cv2.resize(image, size))
    cv2.resizeWindow('contours', *size)

    cv2.waitKey()


def downscale_image(image: np.ndarray, max_size=2048) -> np.ndarray:
    """
    Downscale image

    :param image: Input image
    :param max_size: Maxi size, defaults to 2048
    :return: Downscaled image
    """

    if max_size <= max_size:
        return image

    scale = max_size / max(image.shape)
    return cv2.resize(image, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)


def make_square(image):
    y, x = image.shape[:2]
    max_side = max(y, x)

    dy = max_side - y
    dx = max_side - x

    top = dy // 2
    bottom = dy - top
    left = dx // 2
    right = dx - left
    return cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])


def align(image):
    image_processed = cv2.Canny(image, 100, 200)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    image_processed = cv2.dilate(image_processed, kernel, iterations=2)
    plt.imshow(image_processed)
    non_zero_coordinates = cv2.findNonZero(image_processed)
    box = cv2.minAreaRect(non_zero_coordinates)
    (x, y), angle = box[1:]
    if (-5 < angle < 5) or (90 - 5 < angle < 90 + 5) or (angle < -90 + 5):
        return image

    y, x = image.shape
    rotate_M = cv2.getRotationMatrix2D((x // 2, y // 2), angle, 1)
    return cv2.warpAffine(
        image.copy(),
        rotate_M,
        (int(x), int(y)),
        cv2.INTER_CUBIC,
        cv2.BORDER_REPLICATE,
    )

In [None]:
img_gray = cv2.cvtColor(img_real, cv2.COLOR_BGR2GRAY)
img_gray = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
img_gray = downscale_image(img_gray)
border_add = 10
img_gray = cv2.copyMakeBorder(
    img_gray,
    border_add,
    border_add,
    border_add,
    border_add,
    cv2.BORDER_CONSTANT,
    value=[0, 0, 0],
)
img_gray = make_square(img_gray)


#  rotate_M = cv2.getRotationMatrix2D((x // 2, y // 2), angle, 1)
#     cv2.warpAffine(
#         image.copy(),
#         rotate_M,
#         (int(x), int(y)),
#         cv2.INTER_CUBIC,
#         cv2.BORDER_REPLICATE,
#     )
# img_gray = align(img_gray)
plt.imshow(img_gray)

# Clasterization

In [None]:
class BoxProperties(NamedTuple):
    """Box parameters: x,y x1,y1"""

    x0: int
    y0: int
    x1: int
    y1: int


def get_mask_map(image: np.ndarray) -> tuple[dict[int, np.ndarray], list[int]]:
    """
    Return separate text area masks on image

    :param image: Image to analyse separate text blocks
    :return: List of text area masks in the order of their power
    """
    img = cv2.GaussianBlur(image, (9, 9), 2)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    img = cv2.dilate(img, kernel, iterations=1)
    n_rectangles, segmented_img = cv2.connectedComponents(img)

    types_map = {i: np.uint8(segmented_img == i) * i for i in range(1, n_rectangles + 1)}

    type_list = sorted(
        list(range(1, n_rectangles + 1)),
        key=lambda i: np.count_nonzero(types_map[i]),
        reverse=True,
    )
    return types_map, type_list


class AttentionArea:
    """Find another blocks around attention area"""

    border_scale = 0.02
    """Border thickness proporitonal image size"""

    def __init__(self, attention_mask: np.ndarray, image: np.ndarray) -> None:
        """
        Create attention area around attention_mask

        :param attention_mask: Used mask to look around
        :param image: Full image
        """

        non_zero_coords = cv2.findNonZero(attention_mask)
        x, y, dx, dy = cv2.boundingRect(non_zero_coords)
        self.focus_box = BoxProperties(x, y, x + dx, y + dy)

        y_full, x_full = image.shape
        y_border = int(y_full * self.border_scale)
        x_border = int(x_full * self.border_scale)
        self.window_box = BoxProperties(
            max(x - x_border, 0),
            max(y - y_border, 0),
            min(x + dx + x_border, x_full),
            min(y + dy + y_border, y_full),
        )

        self.focus = attention_mask.copy()
        window_slice = (
            slice(self.window_box.y0, self.window_box.y1),
            slice(self.window_box.x0, self.window_box.x1),
        )
        self.window = np.zeros_like(image)
        self.window[window_slice] = image[window_slice].copy()

    @property
    def F1_metric(self) -> float:
        """F1 metric for a clasterisation quality"""
        window_slice = (
            slice(self.window_box.y0, self.window_box.y1),
            slice(self.window_box.x0, self.window_box.x1),
        )

        focus = self.focus[window_slice].copy()
        focus[focus != 0] = 1

        window = self.window[window_slice].copy()
        window[window != 0] = 1

        dy, dx = focus.shape

        tp = np.count_nonzero(focus)
        fp = dx * dy
        fn = np.count_nonzero(window - focus)
        return 2 * tp / (2 * tp + fp + fn)

    def get_periphery_types(self):
        rule_func = lambda i: np.count_nonzero(
            self.focus[self.focus == i] * 1,
        ) + np.count_nonzero(
            self.window[self.window == i] * 1,
        )

        return sorted(
            set(np.unique(self.window)) - set(np.unique(self.focus)),
            key=rule_func,
            reverse=False,
        )

In [None]:
img_processed = cv2.GaussianBlur(img_gray, (3, 3), 5)
img_processed = cv2.Canny(img_processed, 100, 200)
mask_map, mask_types = get_mask_map(img_processed)
typed_mask = sum(mask_map.values())

clasters = []
while len(mask_types):
    central_type = mask_types.pop(0)
    area = AttentionArea(mask_map[central_type], typed_mask)

    periphery_types = area.get_periphery_types()

    while len(periphery_types):
        periphery_type = periphery_types.pop(0)
        new_area = AttentionArea(area.focus + mask_map[periphery_type], typed_mask)

        add_img = sum([mask_map[i] for i in area.get_periphery_types()])
        show_img = cv2.bitwise_not(
            np.sign(area.focus) * 200
            + np.sign(mask_map[periphery_type]) * 50
            + np.sign(add_img) * 15
        )
        cv2.rectangle(
            show_img,
            (area.window_box.x0, area.window_box.y0),
            (area.window_box.x1, area.window_box.y1),
            200,
            2,
        )
        cv2.rectangle(
            show_img,
            (area.focus_box.x0, area.focus_box.y0),
            (area.focus_box.x1, area.focus_box.y1),
            200,
            2,
        )
        show(show_img)

        if (new_area.F1_metric < area.F1_metric * 0.9) or (periphery_type not in mask_types):
            continue

        area = new_area
        periphery_types = area.get_periphery_types()
        mask_types.remove(periphery_type)

    claster = np.sign(area.focus) * (len(clasters) + 1)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    claster = cv2.dilate(claster, kernel, iterations=5)
    clasters.append(claster)

cv2.destroyAllWindows()

plt.imshow(sum(clasters))

In [None]:
plt.imshow(sum(clasters))

# Recognize

In [None]:
# from pyaspeller import YandexSpeller
# import pytesseract

In [None]:
# speller = YandexSpeller()

In [None]:
plt.imshow(clasters[0])

In [None]:
plt.imshow(cv2.bitwise_and(img_gray, img_gray, mask=clasters[2]))

In [None]:
class RecognizeResult(NamedTuple):
    text: str
    angle: int
    x: int
    y: int
    dx: int
    dy: int


res = []
n = 0
for img_mask in clasters:
    n += 1

    img_cropped = cv2.bitwise_and(img_gray, img_gray, mask=img_mask)
    non_zero_coords = cv2.findNonZero(img_mask)
    box_cordinates = cv2.boundingRect(non_zero_coords)

    # plt.imshow(img_cropped)

    for angle in [0, -90, 90, 180]:
        # text = pytesseract.image_to_string(img_cropped, lang='rus+eng', config='--psm 3')
        # text = speller.spelled(text)
        is_correct = angle == 180
        text = str(n) + '\n'
        if len(text) and is_correct:
            res.append(RecognizeResult(text, angle, *box_cordinates))
            break

In [None]:
angles = [i.angle for i in res]
general_angle = max(set(angles), key=angles.count)
slope = 2
metric = {
    0: lambda f: f. x + slope * f.y,
    -90: lambda f: slope * f.x - (f.y + f.dy),
    90: lambda f: -slope * (f.x + f.dx) + f.y,
    180: lambda f: -(f.x + f.dx) - slope * (f.y + f.dy),
}[general_angle]

In [None]:
print(' '.join([i.text for i in sorted(res, key=metric)]))