In [1]:
import paddleocr
import requests
import numpy as np
import re
import hashlib
import cv2

In [34]:
paddle_dict = {}
paddle_dict["en"] = paddleocr.PaddleOCR(lang='en', use_gpu=False, use_angle_cls=True, use_dilation=True, use_tensorrt=True, enable_mkldnn=True)
paddle_dict["ru"] = paddleocr.PaddleOCR(lang='ru', use_gpu=False, use_angle_cls=True, use_dilation=True, use_tensorrt=True, enable_mkldnn=True)

Namespace(benchmark=False, cls_batch_num=6, cls_image_shape='3, 48, 192', cls_model_dir='/home/sefa/.paddleocr/2.2.1/ocr/cls/ch_ppocr_mobile_v2.0_cls_infer', cls_thresh=0.9, cpu_threads=10, det=True, det_algorithm='DB', det_db_box_thresh=0.6, det_db_score_mode='fast', det_db_thresh=0.3, det_db_unclip_ratio=1.5, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_east_score_thresh=0.8, det_limit_side_len=960, det_limit_type='max', det_model_dir='/home/sefa/.paddleocr/2.2.1/ocr/det/en/en_ppocr_mobile_v2.0_det_infer', det_sast_nms_thresh=0.2, det_sast_polygon=False, det_sast_score_thresh=0.5, drop_score=0.5, e2e_algorithm='PGNet', e2e_char_dict_path='./ppocr/utils/ic15_dict.txt', e2e_limit_side_len=768, e2e_limit_type='max', e2e_model_dir=None, e2e_pgnet_mode='fast', e2e_pgnet_polygon=True, e2e_pgnet_score_thresh=0.5, e2e_pgnet_valid_set='totaltext', enable_mkldnn=True, gpu_mem=500, help='==SUPPRESS==', image_dir=None, ir_optim=True, label_list=['0', '180'], lang='en', layout_path_mod

In [35]:
IMAGE_SIZE_THRESHOLD = 1300

In [53]:
def read_from_image(image_path):
    langs = list(paddle_dict.keys()) # the languages in the dict are English, Russian and Arabic, i.e., ['en', 'ru', 'ar']
    recog = {}
    # --------------------------------------------------------------------------------------------------------------
    '''
    box_solution_equality function has the word and score as solution for the language that will correspond
    to our bounding box.
    solution[0] : bounding boxes --> [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
    solution[1] : predicted word and score in tuple format.
    if solution[0] equal to our 'box' variable, it returns us solution[1], so the word and the score.
    '''
    def box_solution_equality(box, recog_lang):
        for solution in recog_lang:
            if str(box) == str(solution[0]):
                return solution[1]
        return ('', 0)
    # --------------------------------------------------------------------------------------------------------------
    '''
    check_max_length_lang function determines the total number of words for each language. Returns the language
    Whichever has the highest number of words
    '''  
    def check_max_length_lang(recog):
        len_list = []
        langu = list(recog.keys()) # ['en', 'ru', 'ar']
        for lang in langu:
            len_list.append(len([i[1][0] for i in recog[lang]])) # total word number of the language
        if len(len_list) == 1:
            print(f"detected only {langu[0]} language")
            return langu[0]
        else:
            language_lenths_arr = np.array(len_list)
            
            first_max_lang_length = np.max(language_lenths_arr)
            first_max_lang_length_index = language_lenths_arr.argmax()
            
            language_lenths_arr[first_max_lang_length_index] = 0
            
            second_max_lang_length = np.max(language_lenths_arr)
            second_max_lang_length_index = language_lenths_arr.argmax()
            
            if np.abs(first_max_lang_length - second_max_lang_length) <= 1:
                return "en"
            else:
                return langu[first_max_lang_length_index]
    # --------------------------------------------------------------------------------------------------------------
    img_arr = cv2.imread(image_path)
    boxes = paddle_dict['en'].ocr(img_arr, det=True, rec=False, cls=True, boxes_elapse=False) # only detection : There are 2 different model for detection; one of them inside the 'en' folder, the other one 'ch' folder. It's a general model for detection, so it's okey to use it. (no problem)
    sorted_boxes = sorted(boxes, key=lambda x: (x[0][1], x[0][0])) # sorting boxes as in text format. QUESTION: how do you sort vertical or angled scripts? We didn't for the start. In the future problem will be solved (using detr is one of them). 
    boxes_arr = np.array(sorted_boxes, dtype=np.float32) # paddleocr accepts box formats as np.float32
    boxes_elapse = boxes_arr, 0  # box and elapse time in tuple format, we detected texts above the boxes variable, therefore elapse = 0
    for lang in langs:
        # boxes elapse is the parameter placed inside the paddleocr, we can recognize it by sending a box from outside.
        recog[lang] = paddle_dict[lang].ocr(img_arr, det=True, rec=True, cls=True, boxes_elapse=boxes_elapse)
    
    solution_dict = {}

    for box in sorted_boxes:
        solution_dict[str(box)] = dict() # solution_dict['[[x1, y1], [x2, y2], [x3, y3], [x4, y4]]'] = {}
        for lang in langs:
            solution_dict[str(box)][lang] = box_solution_equality(box, recog[lang]) # {}[box][lang] = (word, score)
        
    extracted_text = ' '

    natural_lang = check_max_length_lang(recog) # The language that has maximum number of words.
    print(f"Natural lang : {natural_lang}")
    
    for box in sorted_boxes:

        # scores for every language in the corresponding box, for example : [english score, russian score, arabic score]
        val = [solution_dict[str(box)][l][1] for l in langs if solution_dict[str(box)][l]]

        if len(val): # if list is not empty, if empty it will return empty extracted_text = ''
            arr_val = np.array(val)
            arg_max0 = arr_val.argmax() # get argmax (index of the maximum value.)
            max_value0 = val[arg_max0] # get that maximum value using argmax0
            arr_val[arg_max0] = 0 # set the maximum value to 0, therefore we can find second maximum value's index
            arg_max1 = arr_val.argmax() # if there is only one language, then next maximum value will be equal to 0, so that's okey.
            max_value1 = val[arg_max1] # now take the second largest value

            '''
            Let's take an example. Assume true lang is English. It can be tha case that, due to distrortions, D can be seen as Д, a cyrillic letter. Then, one may have:
            english prediction score = 0.92
            russian prediction score = 0.96
            According to the results, solution must be russian, but we know this is an english word.
            If the two scores are very close to each other (say a probability of 0.3), take the language from natural language variable. If there is a
            clear difference between the two scores then take the maximum one's language. {}[box][max lang]
            so in the image that has 10 english and 2 russian words, it can also detects the russian ones.
            '''
            if natural_lang == None:
                extracted_text += solution_dict[str(box)][langs[arg_max0]][0] + ' '
            elif max_value0 - max_value1 > 0.25:
                extracted_text += solution_dict[str(box)][langs[arg_max0]][0] + ' '
            elif solution_dict[str(box)][langs[arg_max0]]:
                extracted_text += solution_dict[str(box)][natural_lang][0] + ' '

    return extracted_text.strip() # ignoring the spaces ' '

In [54]:
img_pth_base = "/home/sefa/Downloads/ocr-problem-update/"

In [55]:
img_path = "f942e8626c9209d97dfc320d78faf31d7b94a9ab6ff7565c8b99af8cba410de69135fe04590bf1507d6a9dd716ea93b46c1d88b95131c58f60c8a9feaf6f42af.jpg"
img_full_path = img_pth_base + img_path
read_from_image(img_full_path)

[2021/09/07 02:46:01] root DEBUG: dt_boxes num : 4, elapse : 0
[2021/09/07 02:46:01] root DEBUG: cls num  : 4, elapse : 0.01679086685180664
[2021/09/07 02:46:01] root DEBUG: rec_res num  : 4, elapse : 0.04561448097229004
[2021/09/07 02:46:01] root DEBUG: dt_boxes num : 4, elapse : 0
[2021/09/07 02:46:01] root DEBUG: cls num  : 4, elapse : 0.019659757614135742
[2021/09/07 02:46:01] root DEBUG: rec_res num  : 4, elapse : 0.05081462860107422
Natural lang : en


'Karides mi? Kalamar mi kil kya "keyf .e bak"'

In [56]:
img_path = "f0bbd264ea8c916fc20b4b73bbadb0bd371eaced144db0a4e3cd32a0bf4a8c279f0d7e7509642c5641d4dbc178c90184436ec560cf4cfbba48c14942a0a684a3.jpg"
img_full_path = img_pth_base + img_path
read_from_image(img_full_path)

[2021/09/07 02:46:01] root DEBUG: dt_boxes num : 4, elapse : 0
[2021/09/07 02:46:01] root DEBUG: cls num  : 4, elapse : 0.01837325096130371
[2021/09/07 02:46:01] root DEBUG: rec_res num  : 4, elapse : 0.05168008804321289
[2021/09/07 02:46:01] root DEBUG: dt_boxes num : 4, elapse : 0
[2021/09/07 02:46:01] root DEBUG: cls num  : 4, elapse : 0.018820524215698242
[2021/09/07 02:46:01] root DEBUG: rec_res num  : 4, elapse : 0.05468606948852539
Natural lang : en


'okulgDongs kiLikya keyfine bak"'

In [57]:
img_path = "b855f4063180cda1fa9a41229c167a26731e4c648572aff255cfeeb1517ded999be2e46fffe218c3ec715c9a0bcd8bd39794c4e48104165d639a410a503e6b1d.png"
img_full_path = img_pth_base + img_path
read_from_image(img_full_path)

[2021/09/07 02:46:01] root DEBUG: dt_boxes num : 17, elapse : 0
[2021/09/07 02:46:02] root DEBUG: cls num  : 17, elapse : 0.07623028755187988
[2021/09/07 02:46:02] root DEBUG: rec_res num  : 17, elapse : 0.21207571029663086
[2021/09/07 02:46:02] root DEBUG: dt_boxes num : 17, elapse : 0
[2021/09/07 02:46:02] root DEBUG: cls num  : 17, elapse : 0.0851292610168457
[2021/09/07 02:46:02] root DEBUG: rec_res num  : 17, elapse : 0.20911002159118652
Natural lang : en


'ODUL intel UNIVERSITY #4 700TL 1.250TL HAVUZU BIRIiNCILiK #5350TL 1.000TL #6 350TL IKINCILiK #7 250TL 850TL UCUNCOLOK #8 250TL  Oozan'

In [58]:
img_path = "jlvkto4khv0z.jpg"
img_full_path = img_pth_base + img_path
read_from_image(img_full_path)

[2021/09/07 02:46:02] root DEBUG: dt_boxes num : 14, elapse : 0
[2021/09/07 02:46:02] root DEBUG: cls num  : 14, elapse : 0.08517694473266602
[2021/09/07 02:46:03] root DEBUG: rec_res num  : 14, elapse : 0.25046253204345703
[2021/09/07 02:46:03] root DEBUG: dt_boxes num : 14, elapse : 0
[2021/09/07 02:46:03] root DEBUG: cls num  : 14, elapse : 0.07745909690856934
[2021/09/07 02:46:03] root DEBUG: rec_res num  : 14, elapse : 0.2460954189300537
Natural lang : ru


'71:35 0325. Введитекод ИбегБЗбб чтобы подтвердить свой номер 6931 привет от Uberl Bведите 829б чтобы подтвердить номер мобильного телефона Код подтверждения Ибег зЭУб Мом Б Type an SMS message SMS і'