In [None]:
import torch
from PIL import Image
from transformers import VisionEncoderDecoderModel, TrOCRProcessor

In [None]:
model = VisionEncoderDecoderModel.from_pretrained("./model_saved")
# model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-small-stage1").to('cpu')

In [None]:
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-small-handwritten", "cointegrated/LaBSE-en-ru")
# calling the processor is equivalent to calling the feature extractor
image = Image.open(r"C:\Transformer-Based-OCR\imgs\00e8a9ca1e7c44b3b5b3c6fe7364378b.png").convert("RGB")
pixel_values = processor(image, return_tensors="pt").pixel_values
print(pixel_values.shape)

In [None]:
generated_ids = model.generate(pixel_values)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(generated_text)

EASY OCR CRAFT + TROCR

In [4]:
import os
import numpy as np
import easyocr

old_reader = easyocr.Reader(['ru'])

craft_result = old_reader.detect(r"C:\shiftlab_easy_ocr\test\1.jpg")

print(craft_result)

([[[161, 595, 177, 219], [71, 681, 217, 262], [121, 251, 271, 307], [350, 432, 278, 310], [481, 605, 275, 311], [121, 185, 309, 345], [195, 683, 308, 349], [35, 51, 353, 385], [241, 313, 349, 387], [354, 384, 350, 382], [396, 506, 350, 380], [122, 632, 386, 436], [37, 53, 407, 457], [213, 547, 427, 471], [88, 197, 468, 513], [227, 357, 471, 511], [370, 402, 478, 508], [446, 514, 476, 506], [537, 647, 476, 512], [84, 226, 510, 542], [326, 658, 506, 536], [120, 164, 548, 572], [180, 224, 546, 576], [233, 269, 549, 569], [325, 619, 527, 572], [632, 676, 538, 562], [308, 474, 562, 594], [494, 528, 564, 596], [539, 676, 560, 592], [88, 112, 584, 610], [120, 166, 582, 606], [180, 220, 580, 610], [233, 273, 583, 603], [314, 528, 592, 624], [548, 612, 588, 620], [33, 53, 603, 629], [89, 107, 609, 635], [122, 254, 608, 640], [398, 430, 622, 646], [452, 550, 620, 646], [99, 259, 637, 675], [313, 507, 643, 681], [520, 552, 650, 674], [560, 648, 648, 676], [84, 670, 670, 707], [88, 122, 706, 736],

In [50]:
import cv2
import easyocr
import numpy as np
import matplotlib.pyplot as plt
from transformers import VisionEncoderDecoderModel, TrOCRProcessor

class TrOCR():
    def __init__(self) -> None:
        self.easy_craft = easyocr.Reader(['ru'])
        self.model = VisionEncoderDecoderModel.from_pretrained("./doc2text/weights/model_saved")
        self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-small-handwritten",
                                                         "cointegrated/LaBSE-en-ru")
        
    def generate(self, image_path:str) -> list:
        """
        Will return:
        [
        ([[x_min, x_max, y_min, y_max],...], "predict", conf?),
        ...
        ]
        """
        #open image with russian path(if you don't have cyr symbols, just use img = cv2.read(image_path))
        with open(image_path, "rb") as f:
            chunk = f.read()
            chunk_arr = np.frombuffer(chunk, dtype=np.uint8)
            img = cv2.imdecode(chunk_arr, cv2.IMREAD_UNCHANGED)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        #get bboxes and small images according to them by using a craft_EasyOCR
        cropped_images = []
        bboxes = self.easy_craft.detect(image_path)
        for box in bboxes[0][0]:
            x_min, x_max, y_min, y_max = box
            cropped_image = img[y_min:y_max, x_min:x_max]
            cropped_images.append(cropped_image)
            break

        #TrOCR model
        pixel_values = self.processor(cropped_images, return_tensors="pt").pixel_values
        generated_ids = self.model.generate(pixel_values)
        generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)
        print(generated_text)

        
model = TrOCR()



inference_file.py

In [3]:
from TrOCR_inference import TrOCR

model = TrOCR(device='cuda')



In [4]:
model.generate(r"C:\shiftlab_easy_ocr\test\2.jpg")

['эталю', 'Министерство нефтяной промышлсниости СССР', 'ИВВ.', '5045', 'У правление полевой', 'промысловой', 'геофизики', 'Трест, ТАТНБФГЕГЕФФИЭИКА', 'Бугульмиская промыслово-теофизическаи контора', 'Боковой,', 'индукционный наротаж', 'СКВ', 'Же', 'ИСОО', 'УБР', 'Данакоейские', 'РИТС', 'Мо', 'Площаль', 'Илькееваская', 'Ат', 'февдаля', '1977', 'БК', 'НК.', 'тип', 'аппаратуры. БК-7', 'тип', 'аппаратуры. ПМК-2 ф', '30ня', '1.21 1', 'зона', 'Ф 0. 75', 'Забой', '1742', 'Альтитула', 'стола', 'ротора', '208.05', 'И.', 'СКВ.', "ИК'У илинение", '20', 'г.П.', '1720', 'И.', '1.23 м', 'башмак', 'коня.', '010', 'добавки', 'Раствор глинистй', 'вязкость', '32', 'сек.', 'Ул.', 'вес', '1.22', 'Ул-сопр. р-ра', '0.9', 'ОМ', 'Примечание;', 'Маспитаб глубин', '1:200', 'Начальник партны', 'Митиласов', 'Оператор упр.', 'из:ят.', 'полнграф.', 'кннж.', 'торг. зак. Ме', '857. т. 3000.']
