In [None]:
pip install torch torchvision torchaudio 'git+https://github.com/facebookresearch/detectron2.git' pytesseract

In [None]:
!apt install tesseract-ocr

In [None]:
import numpy as np
import cv2

# detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.data import MetadataCatalog

# Detectron2 config
cfg = get_cfg()
cfg = model_zoo.get_config("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.1
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")

predictor = DefaultPredictor(cfg)

def align_document(image, mask):
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    contour = max(contours, key=cv2.contourArea)
    rect = cv2.minAreaRect(contour)
    box = cv2.boxPoints(rect)
    box = np.int0(box)

    center = np.mean(box, axis=0)
    corners = sorted(box, key=lambda corner: np.arctan2(corner[1] - center[1], corner[0] - center[0]))

    width = int(max(np.linalg.norm(corners[0] - corners[1]), np.linalg.norm(corners[2] - corners[3])))
    height = int(max(np.linalg.norm(corners[1] - corners[2]), np.linalg.norm(corners[3] - corners[0])))
    aligned_corners = np.float32([[0, 0], [width, 0], [width, height], [0, height]])

    M = cv2.getPerspectiveTransform(np.float32(corners), aligned_corners)

    aligned_image = cv2.warpPerspective(image, M, (width, height))

    return aligned_image

def rotate_image_if_needed(image):
    height, width = image.shape[:2]
    if height > width:
        image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
    return image

def get_card(image):
    result = []
    outputs = predictor(image)
    instances = outputs["instances"].to("cpu")
    classes = instances.pred_classes
    boxes = instances.pred_boxes.tensor.numpy()
    masks = instances.pred_masks.numpy()
    book_class_index = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).thing_classes.index('book')
    book_indices = np.where(classes == book_class_index)[0]

    if len(book_indices) > 0:
        book_boxes = boxes[book_indices]
        book_masks = masks[book_indices]

        for box, mask in zip(book_boxes, book_masks):
            x1, y1, x2, y2 = box.astype(int)
            mask = (mask[y1:y2, x1:x2] * 255).astype(np.uint8)
            aligned_image = align_document(image[y1:y2, x1:x2], mask)
            result.append(rotate_image_if_needed(aligned_image))
    return result

In [118]:
import numpy as np
import cv2

color_white = (255, 255, 255)

rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (13, 5))
sqKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (21, 21))


def smooth_image(gray):
    gray = cv2.GaussianBlur(gray, (3, 3), 0)
    return cv2.morphologyEx(gray, cv2.MORPH_BLACKHAT, rectKernel)


# https://pythongeeks.org/sobel-and-scharr-operator-in-opencv/
def compute_gradient(black_hat):
    gradX = cv2.Sobel(black_hat, ddepth=cv2.CV_32F, dx=1, dy=0, ksize=-1)
    gradX = np.absolute(gradX)
    (minVal, maxVal) = (np.min(gradX), np.max(gradX))
    return (255 * ((gradX - minVal) / (maxVal - minVal))).astype("uint8")


# Размываем текст чтобы слепить его в единный камок
def apply_closing_operations(gradX, rectKernel, sqKernel):
    gradX = cv2.morphologyEx(gradX, cv2.MORPH_CLOSE, rectKernel)
    thresh = cv2.threshold(gradX, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, sqKernel)
    return cv2.erode(thresh, None, iterations=4)


# При размытие мы могли соъединить текст с границами, убераем 5% слева и справа
def remove_border_pixels(thresh, image):
    p = int(image.shape[1] * 0.05)
    thresh[:, 0:p] = 0
    thresh[:, image.shape[1] - p:] = 0
    return thresh


def find_contours(thresh):
    contours = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
                                cv2.CHAIN_APPROX_SIMPLE)[-2]
    return sorted(contours, key=cv2.contourArea, reverse=True)


def resize_image(img, height):
    width = int(img.shape[1] * height / img.shape[0])
    return cv2.resize(img, (width, height))


def get_mrz_image(img):
    image = resize_image(img, 600)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    black_hat = smooth_image(gray)
    gradX = compute_gradient(black_hat)
    thresh = apply_closing_operations(gradX, rectKernel, sqKernel)
    thresh = remove_border_pixels(thresh, image)
    contours = find_contours(thresh)
    return extract_roi(contours, gray, image)


def extract_roi(contours, gray, image):
    for c in contours:
        (x, y, w, h) = cv2.boundingRect(c)
        ar = w / float(h)
        crWidth = w / float(gray.shape[1])

        if ar > 5 and crWidth > 0.75:
            pX = int((x + w) * 0.03)
            pY = int((y + h) * 0.03)
            (x, y) = (x - pX, y - pY)
            (w, h) = (w + (pX * 2), h + (pY * 2))

            roi = image[y:y + h, x:x + w].copy()
            break

    return roi


def rotate_image(mat, angle):
    height, width = mat.shape[:2]
    image_center = (
        width / 2,
        height / 2)

    rotation_mat = cv2.getRotationMatrix2D(image_center, angle, 1.)

    abs_cos = abs(rotation_mat[0, 0])
    abs_sin = abs(rotation_mat[0, 1])

    bound_w = int(height * abs_sin + width * abs_cos)
    bound_h = int(height * abs_cos + width * abs_sin)

    rotation_mat[0, 2] += bound_w / 2 - image_center[0]
    rotation_mat[1, 2] += bound_h / 2 - image_center[1]

    rotated_mat = cv2.warpAffine(mat, rotation_mat,
                                (bound_w, bound_h),
                                 borderMode=cv2.BORDER_CONSTANT,
                                 borderValue=color_white,
                                 )
    return rotated_mat


def convert_to_binary(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = cv2.GaussianBlur(img, (5, 5), 225)
    se = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))
    bg = cv2.morphologyEx(img, cv2.MORPH_DILATE, se)
    out_gray = cv2.divide(img, bg, scale=255)
    out_binary = cv2.threshold(out_gray, 0, 255, cv2.THRESH_OTSU)[1]
    return out_binary

In [119]:
import os
from pytesseract import pytesseract
os.environ['TESSDATA_PREFIX'] = '/content/tesseract_data'

class Back:
    def __init__(self, img):
        self._image = img
        self.set_MRZ_image()
        self.set_MRZ()

    def set_MRZ_image(self):
        self._MRZ_image = convert_to_binary(get_mrz_image(self._image))

    # https://github.com/DoubangoTelecom/tesseractMRZ/
    def set_MRZ(self):
        self.MRZ = pytesseract.image_to_string(self._MRZ_image, lang='mrz', config='--psm 6')



In [120]:
import re

def validate_mrz(mrz: str) -> bool:
    pattern = r'([A-Z0-9<]{30}\n?){3}'
    return bool(re.match(pattern, mrz))



In [None]:
def helper(img) -> str:
  try:
      passport = Back(img)
      return passport.MRZ
  except UnboundLocalError:
      return 'bad image'

img = cv2.imread('/content/data_set/1.jpg')
images = get_card(img)
mrz = []
for image in images:
  m = helper(image)
  if validate_mrz(m):
    mrz.append(m)

  m = helper(rotate_image(image, 180))
  if validate_mrz(m):
    mrz.append(m)

if len(mrz) == 0:
  print("не удалось корректно обработать изображение")
else:
  print(mrz[0])