In [11]:
from pathlib import Path
from course_intro_ocr_t1.data import MidvPackage
from tqdm import tqdm
import numpy as np
import cv2


In [12]:
DATASET_PATH = Path().absolute().parent.parent / 'midv500' / 'midv500_compressed'
assert DATASET_PATH.exists(), DATASET_PATH.absolute()


In [13]:
# Собираем список пакетов (MidvPackage) 
data_packs = MidvPackage.read_midv500_dataset(DATASET_PATH)
len(data_packs), type(data_packs[0])

(50, course_intro_ocr_t1.data.MidvPackage)

In [14]:
class Cropper:
    def __init__(self):
        self.sift = cv2.SIFT_create()
        
    def preprocess_image(self, img):
        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        return gray_img
        
    def detect_compute_keypoints_descriptors(self, img):
        keypoints = self.sift.detect(img, None)
        keypoints, descriptors = self.sift.compute(img, keypoints)
        return keypoints, descriptors
        
    def match_keypoints(self, template_dscs, target_dscs):
        matches = cv2.FlannBasedMatcher(dict(algorithm=0, trees=5), dict(checks=50)).knnMatch(template_dscs, target_dscs, k=2)
        distances = np.array([m.distance for m, n in matches])
        matches = [matches[i][0] for i in np.where(distances < 0.5 * np.roll(distances, -1))[0]]
        return matches
        
    def find_homography(self, query_pts, train_pts):
        homo, _ = cv2.findHomography(query_pts, train_pts, cv2.RANSAC, 5.0)
        return homo
        
    def transform_angles(self, homo, template_img):
        template_angles = np.array([[0, 0], [len(template_img[0]), 0], [len(template_img[0]), len(template_img)], [0, len(template_img)]], dtype=np.float32)[:, None]
        transformed_angles = cv2.perspectiveTransform(template_angles, homo)
        return transformed_angles
        
    def normalize_coordinates(self, angles, target_img):
        return angles / np.array([len(target_img[0]), len(target_img)])
        
    def angles(self, template_img, target_img):
        template_gray = self.preprocess_image(template_img)
        target_gray = self.preprocess_image(target_img)
        template_kpts, template_dscs = self.detect_compute_keypoints_descriptors(template_gray)
        target_kpts, target_dscs = self.detect_compute_keypoints_descriptors(target_gray)
        matches = self.match_keypoints(template_dscs, target_dscs)
        homo = self.find_homography(np.array([template_kpts[m.queryIdx].pt for m in matches], dtype=np.float32)[:, None],
                                       np.array([target_kpts[m.trainIdx].pt for m in matches], dtype=np.float32)[:, None])
        transformed_angles = self.transform_angles(homo, template_img)
        normalized_angles = self.normalize_coordinates(transformed_angles, target_img)
        return normalized_angles

    def process_data_packs(self, data_packs):
        results_dict = {}
        for dp in tqdm(data_packs):
            for i in range(len(dp)):
                if dp[i].is_test_split():
                    try:
                        results_dict[dp[i].unique_key] = self.angles(np.array(dp.template_item.image), np.array(dp[i].image))
                    except Exception as exc:
                        print(exc)
        return results_dict


In [15]:
cropper = Cropper()
results_dict = cropper.process_data_packs(data_packs)
output_dict = {key: arr.squeeze() for key, arr in results_dict.items()}

 50%|█████     | 25/50 [09:31<10:05, 24.20s/it]

OpenCV(4.9.0) D:\a\opencv-python\opencv-python\opencv\modules\core\src\matmul.dispatch.cpp:550: error: (-215:Assertion failed) scn + 1 == m.cols in function 'cv::perspectiveTransform'



 96%|█████████▌| 48/50 [26:50<03:55, 117.96s/it]

OpenCV(4.9.0) D:\a\opencv-python\opencv-python\opencv\modules\core\src\matmul.dispatch.cpp:550: error: (-215:Assertion failed) scn + 1 == m.cols in function 'cv::perspectiveTransform'



100%|██████████| 50/50 [31:18<00:00, 37.57s/it] 


## Узнаем точность

In [16]:
output_dict

{'01_alb_id|ground_truth|CA|CA01_26.json': array([[0.05510424, 0.33678593],
        [0.84211548, 0.31808861],
        [0.86882243, 0.59175728],
        [0.08149398, 0.61886978]]),
 '01_alb_id|ground_truth|CA|CA01_27.json': array([[0.07105584, 0.34295792],
        [0.83816678, 0.32471367],
        [0.86455946, 0.59250301],
        [0.09599744, 0.61905407]]),
 '01_alb_id|ground_truth|CA|CA01_28.json': array([[0.08542291, 0.35124588],
        [0.83783576, 0.33378422],
        [0.86391378, 0.59590126],
        [0.10968011, 0.621759  ]]),
 '01_alb_id|ground_truth|CA|CA01_29.json': array([[0.08869185, 0.35223263],
        [0.83965658, 0.33720261],
        [0.86647244, 0.59860241],
        [0.1132111 , 0.62324764]]),
 '01_alb_id|ground_truth|CA|CA01_30.json': array([[0.07679771, 0.33745579],
        [0.85077989, 0.32396103],
        [0.8785456 , 0.59278896],
        [0.10260929, 0.61621571]]),
 '01_alb_id|ground_truth|CS|CS01_26.json': array([[0.11950849, 0.41244068],
        [0.93087063, 0.4

In [17]:
from course_intro_ocr_t1.metrics import dump_results_dict, measure_crop_accuracy

In [18]:
dump_results_dict(output_dict, Path() / 'pred.json')

In [19]:
acc = measure_crop_accuracy(
    Path() / 'pred.json',
    Path() / 'gt.json'
)

In [20]:
print("Точность кропа: {:1.4f}".format(acc))

Точность кропа: 0.9544
