In [1]:
import os
import numpy as np
import easyocr
from tqdm.notebook import trange
from doc2text.reader import Reader
from metric.metric import cosine_similarity, cer

def_reader = easyocr.Reader(['ru'])
old_reader = easyocr.Reader(
            ['ru'],
            gpu=True,
            model_storage_directory='C:/shiftlab_easy_ocr/doc2text/weights/easyOCR/model',
            user_network_directory='C:/shiftlab_easy_ocr/doc2text/weights/easyOCR/user_network',
            download_enabled=False,
            recog_network='ru_custom'
        )
new_reader = Reader()

#get y_true
with open("./test/texts.txt", "r") as texts:
    y_true = texts.readlines()

#get apple
with open("./test/texts_apple.txt", "r") as texts_apple:
    apple = texts_apple.readlines() 

scores_def = []
scores_old = []
scores_new = []
scores_apple = []
cer_scores_def = []
cer_scores_old = []
cer_scores_new = []
cer_scores_apple = []
directory = 'test'
for k in trange(len(os.listdir(directory)) - 2):
    filename = str(k + 1) + ".jpg"
    print(filename)
    file_path = os.path.join(directory, filename)
    
    #model eval
    def_result = def_reader.readtext(file_path)
    def_str = ' '.join([word[1] for word in def_result]).lower()
    old_result = old_reader.readtext(file_path)
    old_str = ' '.join([word[1] for word in old_result]).lower()
    new_result = new_reader.doc2text(file_path)
    new_str = new_result[0].lower()
    print("default easyOCR:\n", def_str)
    print("trained easyOCR:\n", old_str)
    print("NEW model:\n", new_str)
    print("apple model:\n", apple[k].lower())
    print("y_true:\n", y_true[k])

    #metric
    scores_def.append(cosine_similarity(y_true[k], def_str))
    scores_old.append(cosine_similarity(y_true[k], old_str))
    scores_new.append(cosine_similarity(y_true[k], new_str))
    scores_apple.append(cosine_similarity(y_true[k], apple[k].lower()))
    cer_scores_def.append(cer(y_true[k], def_str))
    cer_scores_old.append(cer(y_true[k], old_str))
    cer_scores_new.append(cer(y_true[k], new_str))
    cer_scores_apple.append(cer(y_true[k], apple[k].lower()))

print("\n")
print("mean default EasyOCR cosine_similarity", np.mean(scores_def))        
print("mean our EasyOCR cosine_similarity", np.mean(scores_old))
print("mean NEW model cosine_similarity", np.mean(scores_new))
print("mean apple model cosine_similarity", np.mean(scores_apple))

print("mean default EasyOCR cer", np.mean(cer_scores_def))        
print("mean our EasyOCR cer", np.mean(cer_scores_old))
print("mean NEW model cer", np.mean(cer_scores_new))
print("mean apple model cer", np.mean(cer_scores_apple))

recognizer weights has loaded from c:\shiftlab_easy_ocr\doc2text\weights/ocr_transformer_4h2l_simple_conv_64x256.pt


  0%|          | 0/10 [00:00<?, ?it/s]

1.jpg
default easyOCR:
 татнефтегеофизика апьметьевская промыслов-геофизическмя кр^ заказчик нчер рц с-3 скв: &:*{0820 площадь мчннцб@еб скс & 5 дата 18 19.02.73 определенне высоты подъема цемента 5 ультразвуковым методом прибор нкц-! &а 75 зонд 1 2 5 п забой 1787 условные обозначения  кривых скв 214 мм ер-время пробе2о_васн опъ цзлччы/2.9 92 прцепнцка 0 кол [168 мм 2нц-ампжцпц;& важн & б кол. {787 м 7, каланне стап" 1781 м: знр-ампл: волн по парафе условные обозначения качества цементнжа х "орошее] сцепленш: цеш кахоннац 2хх часпцчное 3 ххх <л.: 6 састав кол-ва тампон. смеси 20 п ц2л 20 777 емесц дата и время конца заливки /70.75г 3 {5 99 <2€ дата и время замера {9.4.73 8 ~@с достоян-ая]вгемеян тед скорость25шм/час прим цемент наъл 220 м см , #ки 22 2 21 рвц- 9а чепья 6 кон; 340 м м-6 1 500 4; 20 нач  партии тцсмцпочнов у%
trained easyOCR:
 татнефтегеофизика альметьевскаяпромыслово,гсофизическаяк,ра заказчик аубр рumс,3 скв- n}10820}площадь}миннибаевская  дата 18 19-02-13 определение}

In [3]:
print("default EasyOCR cosine_similarity", scores_def)
print("our EasyOCR cosine_similarity", scores_old)
print("NEW model cosine_similarity", scores_new)
print("apple model cosine_similarity", scores_apple)

print("mean default EasyOCR cer", cer_scores_def)
print("mean our EasyOCR cer", cer_scores_old)
print("mean NEW model cer", cer_scores_new)
print("mean apple model cer", cer_scores_apple)

default EasyOCR cosine_similarity [0.4142073802057458, 0.42332020977033447, 0.27595986330233996, 0.35450928197437176, 0.03952847075210474, 0.27493735864938834, 0.4720991494481081, 0.065033247714309, 0.11704114719613058, 0.0603768543106847]
our EasyOCR cosine_similarity [0.2764043281905089, 0.21761921176845594, 0.2581988897471611, 0.3419583072115192, 0.14999999999999997, 0.18515669296481727, 0.0, 0.12262786789699316, 0.18257418583505539, 0.05692384414639651]
NEW model cosine_similarity [0.4173700964047933, 0.2626836413212474, 0.37905238362839516, 0.3029391884499828, 0.13693063937629152, 0.3863636835205434, 0.2050225600506816, 0.2974059387397313, 0.11605177063713189, 0.08642883444148555]
apple model cosine_similarity [0.6593067210171497, 0.6996859578128172, 0.42438164224022457, 0.4980011342020937, 0.3730019232961255, 0.6940491774014498, 0.5701395457735293, 0.4360407996361905, 0.5677749739576688, 0.23505544201393797]
mean default EasyOCR cer [0.34575835475578404, 0.2763744427934621, 0.392