In [1]:
from typing import NamedTuple

import cv2
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from scipy.ndimage import rank_filter

plt.rcParams["figure.figsize"] = (10, 7)
plt.rcParams['figure.subplot.left'] = 0.1
plt.rcParams['figure.subplot.right'] = 0.99
plt.rcParams['figure.subplot.top'] = 0.97
plt.rcParams['figure.subplot.bottom'] = 0.05
plt.rcParams['figure.subplot.hspace'] = 0.3
matplotlib.rc("image", cmap="gray_r")

In [2]:
%matplotlib qt

In [47]:
img_real = cv2.imread(r"D:\WORKS\TechTasks\DocData\3.jpg")
y, x, _ = img_real.shape
# rotate_M = cv2.getRotationMatrix2D((x / 2, y / 2), 10, 1)
# img_real = cv2.warpAffine(img_real, rotate_M, (x, y), cv2.INTER_CUBIC, cv2.BORDER_REPLICATE)

plt.imshow(img_real)

<matplotlib.image.AxesImage at 0x21822f2f610>

# Prepare

In [4]:
def rescale_image(image: np.ndarray, max_size=1080)-> np.ndarray:
    """
    Downscale image

    :param image: Input image
    :param max_size: Maxi size, defaults to 2048
    :return: Downscaled image
    """

    if max_size <= max_size:
        return image

    scale = max_size / max(image.shape)
    return cv2.resize(image, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)

def make_square(img):
    border_add = 10
    y, x = img.shape[:2]
    max_side = max(y, x)

    dy = max_side - y
    dx = max_side - x

    top = dy // 2 + border_add
    bottom = dy - top + border_add * 2
    left = dx // 2 + border_add
    right = dx - left + border_add * 2
    return cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])

In [51]:
img_gray = cv2.cvtColor(img_real, cv2.COLOR_BGR2GRAY)
img_gray = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
img_gray = rescale_image(img_gray)
img_gray= make_square(img_gray)
plt.imshow(img_gray)

<matplotlib.image.AxesImage at 0x2182085f730>

In [66]:
img_processed = cv2.GaussianBlur(img_gray, (11, 11), 5)
# img_processed = cv2.Canny(img_processed, 100, 200)
# img_processed = rank_filter(img_processed, rank=5, size=3)
plt.imshow(img_processed)

<matplotlib.image.AxesImage at 0x21822b7c8e0>

In [67]:
contours0, hierarchy = cv2.findContours( img_processed.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

for cnt in contours0:
    rect = cv2.minAreaRect(cnt)
    box = cv2.boxPoints(rect)
    box = np.intp(box)
    cv2.drawContours(img_processed,[box],0,(255,0,0),2)

    cv2.imshow('contours', cv2.resize(img_processed,(760,760)))
    cv2.resizeWindow('contours', 760, 760)
    cv2.waitKey()

cv2.destroyAllWindows()

In [65]:
hierarchy

array([[[ 1, -1, -1, -1],
        [ 2,  0, -1, -1],
        [ 3,  1, -1, -1],
        [ 4,  2, -1, -1],
        [ 5,  3, -1, -1],
        [ 6,  4, -1, -1],
        [ 7,  5, -1, -1],
        [ 8,  6, -1, -1],
        [ 9,  7, -1, -1],
        [10,  8, -1, -1],
        [11,  9, -1, -1],
        [12, 10, -1, -1],
        [13, 11, -1, -1],
        [14, 12, -1, -1],
        [15, 13, -1, -1],
        [16, 14, -1, -1],
        [17, 15, -1, -1],
        [18, 16, -1, -1],
        [19, 17, -1, -1],
        [20, 18, -1, -1],
        [21, 19, -1, -1],
        [22, 20, -1, -1],
        [23, 21, -1, -1],
        [24, 22, -1, -1],
        [25, 23, -1, -1],
        [26, 24, -1, -1],
        [27, 25, -1, -1],
        [28, 26, -1, -1],
        [29, 27, -1, -1],
        [30, 28, -1, -1],
        [31, 29, -1, -1],
        [32, 30, -1, -1],
        [33, 31, -1, -1],
        [34, 32, -1, -1],
        [35, 33, -1, -1],
        [36, 34, -1, -1],
        [37, 35, -1, -1],
        [38, 36, -1, -1],
        [39,

In [81]:
def _open_transform(image, kernel_size):
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size))
    filtered_image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
    return filtered_image


def align(image):
    quality = np.array([np.count_nonzero(_open_transform(image, i)) for i in range(1, 10)])
    d_quality = np.diff(quality)
    best_kernel_size = np.where(d_quality == min(d_quality))[0][0] + 1
    opened_image = _open_transform(image, best_kernel_size)

    non_zero_coordinates = cv2.findNonZero(opened_image)
    box = cv2.minAreaRect(non_zero_coordinates)
    box_x, box_y =box[1]
    angle = box[2]

    if angle < -45:
        angle += 90
    if (angle >45) and box_x>box_y:
        angle -=90

    y, x = image.shape
    rotate_M = cv2.getRotationMatrix2D((x / 2, y / 2), angle, 1)

    boxPts = cv2.boxPoints(box)
    imageCopy = image.copy()

    return cv2.warpAffine(imageCopy, rotate_M, (x, y), cv2.INTER_CUBIC, cv2.BORDER_REPLICATE)

In [82]:
img_gray =align(img_gray)
plt.imshow(img_gray)

<matplotlib.image.AxesImage at 0x16c688a5660>

# Clasterization

In [16]:
class BoxProperties(NamedTuple):
    """Box parameters: x,y x1,y1"""

    x0: int
    y0: int
    x1: int
    y1: int


def get_mask_map(image: np.ndarray) -> tuple[dict[int, np.ndarray], list[int]]:
    """
    Return separate text area masks on image

    :param image: Image to analyse separate text blocks
    :return: List of text area masks in the order of their power
    """
    img = cv2.GaussianBlur(image, (9, 9), 2)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    img = cv2.dilate(img, kernel, iterations=1)
    n_rectangles, segmented_img = cv2.connectedComponents(img)

    types_map = {i: np.uint8(segmented_img == i) * i for i in range(1, n_rectangles + 1)}

    type_list = sorted(
        list(range(1, n_rectangles + 1)),
        key=lambda i: np.count_nonzero(types_map[i]),
        reverse=True,
    )
    return types_map, type_list


class AttentionArea:
    """Find another blocks around attention area"""

    border_scale = 0.02
    """Border thickness proporitonal image size"""

    def __init__(self, attention_mask: np.ndarray, image: np.ndarray) -> None:
        """
        Create attention area around attention_mask

        :param attention_mask: Used mask to look around
        :param image: Full image
        """

        non_zero_coords = cv2.findNonZero(attention_mask)
        x, y, dx, dy = cv2.boundingRect(non_zero_coords)
        self.focus_box = BoxProperties(x, y, x + dx, y + dy)

        y_full, x_full = image.shape
        y_border = int(y_full * self.border_scale)
        x_border = int(x_full * self.border_scale)
        self.window_box = BoxProperties(
            max(x - x_border, 0),
            max(y - y_border, 0),
            min(x + dx + x_border, x_full),
            min(y + dy + y_border, y_full),
        )

        self.focus = attention_mask.copy()
        window_slice = (
            slice(self.window_box.y0, self.window_box.y1),
            slice(self.window_box.x0, self.window_box.x1),
        )
        self.window = np.zeros_like(image)
        self.window[window_slice] = image[window_slice].copy()

    @property
    def F1_metric(self) -> float:
        """F1 metric for a clasterisation quality"""
        window_slice = (
            slice(self.window_box.y0, self.window_box.y1),
            slice(self.window_box.x0, self.window_box.x1),
        )

        focus = self.focus[window_slice].copy()
        focus[focus != 0] = 1

        window = self.window[window_slice].copy()
        window[window != 0] = 1

        dy, dx = focus.shape

        tp = np.count_nonzero(focus)
        fp = dx * dy
        fn = np.count_nonzero(window - focus)
        return 2 * tp / (2 * tp + fp + fn)

    def get_periphery_types(self):
        rule_func = lambda i: np.count_nonzero(
            self.focus[self.focus == i] * 1,
        ) + np.count_nonzero(
            self.window[self.window == i] * 1,
        )

        return sorted(
            set(np.unique(self.window)) - set(np.unique(self.focus)),
            key=rule_func,
            reverse=False,
        )

In [92]:
img_processed = cv2.GaussianBlur(img_gray, (3, 3), 5)
img_processed = cv2.Canny(img_processed, 100, 200)
# img_processed = rank_filter(img_processed, rank=5, size=3)
plt.imshow(img_processed)

<matplotlib.image.AxesImage at 0x16c7d9990c0>

In [85]:
mask_map, mask_types = get_mask_map(img_processed)
typed_mask = sum(mask_map.values())
plt.imshow(typed_mask)

<matplotlib.image.AxesImage at 0x16c76f85d50>

In [86]:
clasters = []
while len(mask_types):
    central_type = mask_types.pop(0)
    area = AttentionArea(mask_map[central_type], typed_mask)

    periphery_types = area.get_periphery_types()

    while len(periphery_types):
        periphery_type = periphery_types.pop(0)
        new_area = AttentionArea(area.focus + mask_map[periphery_type], typed_mask)

        # add_img = sum([types_map[i] for i in area.get_periphery_types()])
        # show_img = cv2.bitwise_not(
        #     np.sign(area.focus) * 200
        #     + np.sign(types_map[periphery_type]) * 50
        #     + np.sign(add_img) * 15
        # )
        # cv2.rectangle(
        #     show_img,
        #     (area.window_box.x0, area.window_box.y0),
        #     (area.window_box.x1, area.window_box.y1),
        #     200,
        # )
        # cv2.rectangle(
        #     show_img,
        #     (area.focus_box.x0, area.focus_box.y0),
        #     (area.focus_box.x1, area.focus_box.y1),
        #     200,
        # )
        # cv2.imshow('m', show_img)
        # cv2.waitKey(0)

        if (new_area.F1_metric < area.F1_metric * 0.9) or (periphery_type not in mask_types):
            continue

        area = new_area
        periphery_types = area.get_periphery_types()
        mask_types.remove(periphery_type)

    claster = np.sign(area.focus) * (len(clasters) + 1)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    claster = cv2.dilate(claster, kernel, iterations=5)
    clasters.append(claster)

cv2.destroyAllWindows()
# plt.imshow(img_block)

plt.imshow(sum(clasters))

<matplotlib.image.AxesImage at 0x16c76fedab0>

# Recognize

In [64]:
img_mask = clasters[7]
# plt.imshow(mask)
img_cropped = cv2.bitwise_and(img_gray, img_gray, mask=img_mask)
plt.imshow(img_cropped)

<matplotlib.image.AxesImage at 0x16c687745e0>

In [65]:
np.count_nonzero(img_cropped)

1461

In [21]:
import pytesseract

In [52]:
text = pytesseract.image_to_string(img_cropped,lang='rus+eng',config='--psm 3')
print(text)




In [23]:
from pyaspeller import YandexSpeller

In [50]:
speller = YandexSpeller()
print(speller.spelled(text))

Благодарственное письмо-



In [None]:
def _open_transform(image, kernel_size):
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size))
    filtered_image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
    return filtered_image


# TODO: find deriv from generator
def _hightlight_text(image):
    quality = np.array([np.count_nonzero(_open_transform(image, i)) for i in range(1, 10)])
    d_quality = np.diff(quality)
    best_kernel_size = np.where(d_quality == min(d_quality))[0][0] + 1

    return _open_transform(image, best_kernel_size)


scale = 1
img = paragraphs[0]

# non_zero_coords = cv2.findNonZero(_hightlight_text(img))
# box = cv2.minAreaRect(non_zero_coords)
# angle = box[2]

# h, w = gray_image.shape
# center = (w / 2, h / 2)
# # FIXME: check another rotations
# rotate_M = cv2.getRotationMatrix2D(center, 45 - np.abs(np.abs(angle) - 45), scale)
# aligned_image = cv2.warpAffine(
#     gray_image.copy(), rotate_M, (w, h), cv2.INTER_CUBIC, cv2.BORDER_REPLICATE
# )

# plt.imshow(aligned_image)

# x, y, dx, dy = cv2.boundingRect(cv2.findNonZero(img))
# plt.imshow(img[y:y+dy,x:x+dx])


In [None]:

plt.imshow(img2)
# plt.imshow(mask2)