In [None]:
from typing import NamedTuple

import cv2
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from scipy.ndimage import rank_filter

plt.rcParams["figure.figsize"] = (10, 7)
plt.rcParams['figure.subplot.left'] = 0.1
plt.rcParams['figure.subplot.right'] = 0.99
plt.rcParams['figure.subplot.top'] = 0.97
plt.rcParams['figure.subplot.bottom'] = 0.05
plt.rcParams['figure.subplot.hspace'] = 0.3
matplotlib.rc("image", cmap="gray_r")

In [None]:
%matplotlib qt

In [130]:
img_real = cv2.imread(r"D:\WORKS\TechTasks\DocData\1.jpg")
plt.imshow(img_real)

<matplotlib.image.AxesImage at 0x190000b4be0>

# Prepare

In [None]:
def rescale_image(image: np.ndarray, max_size=1080)-> np.ndarray:
    """
    Downscale image

    :param image: Input image
    :param max_size: Maxi size, defaults to 2048
    :return: Downscaled image
    """

    # if max_dimention <= max_size:
    #     return image

    scale = max_size / max(image.shape)
    return cv2.resize(image, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)

In [150]:
img_gray = cv2.cvtColor(img_real, cv2.COLOR_BGR2GRAY)
img_gray = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
img_gray = cv2.morphologyEx(img_gray, cv2.MORPH_OPEN, kernel, iterations=1)
img_gray = rescale_image(img_gray)

plt.imshow(img_gray)

<matplotlib.image.AxesImage at 0x19065d94c40>

In [None]:
y,x = img_gray.shape

In [None]:
max_size = 1920
max_dimention = max(y,x)
scale = max_size / max_dimention

a = cv2.resize(
        img_gray,
        None,
        dsize=(x*scale,y*scale),
        interpolation=cv2.INTER_CUBIC
    )

# Clasterization

In [None]:
class BoxProperties(NamedTuple):
    """Box parameters: x,y x1,y1"""

    x0: int
    y0: int
    x1: int
    y1: int


class AttentionArea:
    """Find another blocks around attention area"""

    border_scale = 0.05
    """Border thickness proporitonal image size"""

    def __init__(self, attention_mask: np.ndarray, image: np.ndarray) -> None:
        """
        Create attention area around attention_mask

        :param attention_mask: Used mask to look around
        :param image: Full image
        """

        non_zero_coords = cv2.findNonZero(attention_mask)
        x, y, dx, dy = cv2.boundingRect(non_zero_coords)
        self.focus_box = BoxProperties(x, y, x + dx + 1, y + dy + 1)

        y_full, x_full = image.shape
        y_border = int(y_full * self.border_scale)
        x_border = int(x_full * self.border_scale)
        self.window_box = BoxProperties(
            np.maximum(x - x_border, 0),
            np.maximum(y - y_border, 0),
            x + dx + 2 * x_border + 1,
            y + dy + 2 * y_border + 1,
        )

        self.focus = attention_mask.copy()
        window_slice = (
            slice(self.window_box.y0, self.window_box.y1),
            slice(self.window_box.x0, self.window_box.x1),
        )
        self.window = np.zeros_like(image)
        self.window[window_slice] = image[window_slice].copy()

    @property
    def F1_metric(self) -> float:
        """F1 metric for a clasterisation quality"""
        window_slice = (
            slice(self.window_box.y0, self.window_box.y1),
            slice(self.window_box.x0, self.window_box.x1),
        )

        focus = self.focus[window_slice].copy()
        focus[focus != 0] = 1

        window = self.window[window_slice].copy()
        window[window != 0] = 1

        dy, dx = focus.shape

        tp = np.count_nonzero(focus)
        fp = dx * dy
        fn = np.count_nonzero(window - focus)
        return 2 * tp / (2 * tp + fp + fn)

    @property
    def periphery_types(self):
        return sorted(
            set(np.unique(self.window)) - set(np.unique(self.focus)),
            key=lambda i: np.count_nonzero(self.focus[self.focus == i] * 1),
            reverse=True,
        )


def get_blocks_map(image: np.ndarray) -> tuple[dict[int, np.ndarray], list[int]]:
    """
    Return separate text area masks on image

    :param image: Image to analyse separate text blocks
    :return: List of text area masks in the order of their power
    """
    max_rectangles = 20

    img = cv2.GaussianBlur(image, (3, 3), 5)
    img = cv2.Canny(img, 100, 200)
    img = rank_filter(img, rank=5, size=3)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    n_rectangles = max_rectangles + 1
    while n_rectangles > max_rectangles:
        img = cv2.dilate(img, kernel, iterations=1)
        n_rectangles, segmented_img = cv2.connectedComponents(img)
        n_rectangles -= 1

    block_map = {i: np.uint8(segmented_img == i) * i for i in range(1, n_rectangles + 1)}

    block_types = sorted(
        list(range(1, n_rectangles + 1)),
        key=lambda i: np.count_nonzero(block_map[i]),
        reverse=True,
    )
    return block_map, block_types

In [None]:
# # img = cv2.GaussianBlur(img_gray, (3, 3), 5)
# img = cv2.Canny(img, 100, 200)
# img = rank_filter(img, rank=5, size=3)
# plt.imshow(img)

In [151]:
plt.imshow(cv2.GaussianBlur(img_gray, (9, 9), 2))

<matplotlib.image.AxesImage at 0x1906069d6f0>

In [152]:

img = cv2.GaussianBlur(img_gray, (9, 9), 2)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(3,3))
open_image = cv2.morphologyEx(img,cv2.MORPH_OPEN,kernel, iterations=1)
# plt.imshow(img)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
img = cv2.dilate(img, kernel, iterations=1)
n_rectangles, segmented_img = cv2.connectedComponents(img)


block_map = {i: np.uint8(segmented_img == i) * i for i in range(1, n_rectangles + 1)}

not_used_block_types = sorted(
    list(range(1, n_rectangles + 1)),
    key=lambda i: np.count_nonzero(block_map[i]),
    reverse=True,
)
img_block=sum(block_map.values())
plt.imshow(img_block)

<matplotlib.image.AxesImage at 0x19060623880>

In [153]:
# block_map, not_used_block_types = get_blocks_map(img_gray)
# img_block = sum(block_map.values())

clasters = []
while len(not_used_block_types):
    central_i = not_used_block_types.pop(0)
    area = AttentionArea(block_map[central_i], img_block)

    claster_mask = area.focus.copy()
    F1_metric = area.F1_metric

    for periphery_type in [i for i in area.periphery_types if i in not_used_block_types]:
        cv2.imshow(
            'mask',
            cv2.bitwise_not(np.sign(claster_mask) * 100 + np.sign(block_map[periphery_type]) * 20),
        )
        cv2.waitKey(0)

        new_area = AttentionArea(claster_mask + block_map[periphery_type], img_block)
        if new_area.F1_metric < F1_metric * 0.9:
            continue
        F1_metric = new_area.F1_metric
        claster_mask += block_map[periphery_type]
        not_used_block_types.remove(periphery_type)
    # kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    # claster_mask = cv2.dilate(claster_mask, kernel, iterations=5)

    clasters.append(np.sign(claster_mask) * (len(clasters) + 1))
cv2.destroyAllWindows()
# plt.imshow(img_block)

In [169]:
plt.imshow(sum(clasters))

<matplotlib.image.AxesImage at 0x19060418850>

In [168]:
mask = clasters[0]
# plt.imshow(mask)
img_cropped = cv2.bitwise_and(img_gray, img_gray, mask=mask)
plt.imshow(img_cropped)

<matplotlib.image.AxesImage at 0x1906049cac0>

# Recognize

In [None]:
def _open_transform(image, kernel_size):
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size))
    filtered_image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
    return filtered_image


# TODO: find deriv from generator
def _hightlight_text(image):
    quality = np.array([np.count_nonzero(_open_transform(image, i)) for i in range(1, 10)])
    d_quality = np.diff(quality)
    best_kernel_size = np.where(d_quality == min(d_quality))[0][0] + 1

    return _open_transform(image, best_kernel_size)


scale = 1
img = paragraphs[0]

# non_zero_coords = cv2.findNonZero(_hightlight_text(img))
# box = cv2.minAreaRect(non_zero_coords)
# angle = box[2]

# h, w = gray_image.shape
# center = (w / 2, h / 2)
# # FIXME: check another rotations
# rotate_M = cv2.getRotationMatrix2D(center, 45 - np.abs(np.abs(angle) - 45), scale)
# aligned_image = cv2.warpAffine(
#     gray_image.copy(), rotate_M, (w, h), cv2.INTER_CUBIC, cv2.BORDER_REPLICATE
# )

# plt.imshow(aligned_image)

# x, y, dx, dy = cv2.boundingRect(cv2.findNonZero(img))
# plt.imshow(img[y:y+dy,x:x+dx])


In [None]:

plt.imshow(img2)
# plt.imshow(mask2)

In [None]:
import easyocr

In [None]:
reader = easyocr.Reader(lang_list=['ru'],gpu=False)

In [None]:
reader.readtext(img2, width_ths=0.7,height_ths=0.7,rotation_info =list(range(0,270)))