[Unlocking the Power of PaddleOCR](https://medium.com/@vinod-baste/unlocking-the-power-of-paddleocr-4141544f8dba)

In [None]:
class DecMain:
    def __init__(self, image_folder_path, label_file_path, output_file):
        """
        image_folder_path: The path to the folder containing the input images for OCR.
        label_file_path: The path to the ground truth label file that contains the actual text content of the images.
        output_file: The filename of the output file where the evaluation results will be saved.
        """
        self.image_folder_path = image_folder_path
        self.label_file_path = label_file_path
        self.output_file = output_file

    def run_dec(self):
        # Check and update the ground truth file
        CheckAndUpdateGroundTruth(
            self.label_file_path
        ).check_and_update_ground_truth_file()
        #  convert the OCR results into a pandas DataFrame
        # The parameters det=True and rec=True indicate that both text detection and recognition results will be included in the DataFrame.
        df = OcrToDf(
            image_folder=self.image_folder_path,
            label_file=self.label_file_path,
            det=True,
            rec=True,
            cls=False,
        ).ocr_to_df()

        ground_truth_data = ReadGroundTruthFile(
            self.label_file_path
        ).read_ground_truth_file()

        # Get the extracted text as a list of dictionaries (representing the OCR results)
        ocr_results = df.to_dict(orient="records")

        # Calculate precision, recall, and CER
        precision, recall, total_samples = CalculateMetrics(
            ground_truth_data, ocr_results
        ).calculate_precision_recall()

        CreateSheet(
            dataframe=df,
            precision=precision,
            recall=recall,
            total_samples=total_samples,
            file_name=self.output_file,
        ).create_sheet()

# Installation

In [None]:
# !brew install swig
# !python -m pip install paddlepaddle
# !python -m pip install  paddleocr

# !pip show paddleocr

In [None]:
# !python -m pip install --upgrade pip

In [3]:
!which python

/Users/velo1/SynologyDrive/GIT_syno/Mac/Netology/OCR/ocr-venv/bin/python


In [27]:
import matplotlib.font_manager as fm
from PIL import ImageFont

font_size = 12
font_path = fm.findfont(fm.FontProperties(family="Arial"))
font = ImageFont.truetype(font_path, font_size)
font_path

'/System/Library/Fonts/Supplemental/Arial.ttf'

In [30]:
!paddleocr --help

usage: paddleocr [-h] [--use_gpu USE_GPU] [--use_xpu USE_XPU]
                 [--use_npu USE_NPU] [--ir_optim IR_OPTIM]
                 [--use_tensorrt USE_TENSORRT]
                 [--min_subgraph_size MIN_SUBGRAPH_SIZE]
                 [--precision PRECISION] [--gpu_mem GPU_MEM] [--gpu_id GPU_ID]
                 [--image_dir IMAGE_DIR] [--page_num PAGE_NUM]
                 [--det_algorithm DET_ALGORITHM]
                 [--det_model_dir DET_MODEL_DIR]
                 [--det_limit_side_len DET_LIMIT_SIDE_LEN]
                 [--det_limit_type DET_LIMIT_TYPE]
                 [--det_box_type DET_BOX_TYPE] [--det_db_thresh DET_DB_THRESH]
                 [--det_db_box_thresh DET_DB_BOX_THRESH]
                 [--det_db_unclip_ratio DET_DB_UNCLIP_RATIO]
                 [--max_batch_size MAX_BATCH_SIZE]
                 [--use_dilation USE_DILATION]
                 [--det_db_score_mode DET_DB_SCORE_MODE]
                 [--det_east_score_thresh DET_EAST_SCORE_THRESH]
        

In [39]:
from paddleocr import PaddleOCR, draw_ocr

# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
# You can set the parameter `lang` as `ch`, `en`, `fr`, `german`, `korean`, `japan`
# to switch the language model in order.
ocr = PaddleOCR(
    use_angle_cls=True, use_dilation=True, det=True, rec=True, cls=True, lang="ru")
  # need to run only once to download and load model into memory
img_path = "./OCR Samples/photo_2023-11-19_13-33-00 (2).jpg"
result = ocr.ocr(img_path, cls=True)
for idx in range(len(result)):
    res = result[idx]
    for line in res:
        print(line)


# draw result
from PIL import Image

result = result[0]
image = Image.open(img_path).convert("RGB")
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(
    image, boxes, txts, scores, font_path=font_path
)  # , font_path='./fonts/simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save("result.jpg")

[2023/11/23 10:13:43] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/Users/velo1/.paddleocr/whl/det/ml/Multilingual_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=True, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/Users/velo1/.paddleocr/whl/rec/cyrillic/cyrillic_PP-OCRv3_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_t

In [14]:
import matplotlib.font_manager as fm
from PIL import ImageFont

font_size = 12
font_path = fm.findfont(fm.FontProperties(family="Arial"))
font = ImageFont.truetype(font_path, font_size)
font_path

'/System/Library/Fonts/Supplemental/Arial.ttf'

In [23]:
from paddleocr import PaddleOCR, draw_ocr

# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
# You can set the parameter `lang` as `ch`, `en`, `fr`, `german`, `korean`, `japan`
# to switch the language model in order.
ocr = PaddleOCR(
    use_angle_cls=True, lang="ru"
)  # need to run only once to download and load model into memory
img_path = "./OCR Samples/photo_2023-11-19_13-33-01.jpg"
result = ocr.ocr(img_path, cls=True)
for idx in range(len(result)):
    res = result[idx]
    for line in res:
        print(line)


# draw result
from PIL import Image

result = result[0]
image = Image.open(img_path).convert("RGB")
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(
    image, boxes, txts, scores, font_path=font_path
)  # , font_path='./fonts/simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save("result.jpg")

[2023/11/23 09:47:11] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/Users/velo1/.paddleocr/whl/det/ml/Multilingual_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/Users/velo1/.paddleocr/whl/rec/cyrillic/cyrillic_PP-OCRv3_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_

In [22]:
from paddleocr import PaddleOCR, draw_ocr

# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
# You can set the parameter `lang` as `ch`, `en`, `fr`, `german`, `korean`, `japan`
# to switch the language model in order.
ocr = PaddleOCR(
    use_angle_cls=True, lang="ru", page_num=1
)  # need to run only once to download and load model into memory
img_path = "./OCR Samples/Экоэнерго (СФ).pdf"
result = ocr.ocr(img_path, cls=True)
for idx in range(len(result)):
    res = result[idx]
    for line in res:
        print(line)

# draw result
import fitz
from PIL import Image
import cv2
import numpy as np

imgs = []
with fitz.open(img_path) as pdf:
    for pg in range(0, pdf.page_count):
        page = pdf[pg]
        mat = fitz.Matrix(2, 2)
        pm = page.get_pixmap(matrix=mat, alpha=False)
        # if width or height > 2000 pixels, don't enlarge the image
        if pm.width > 2000 or pm.height > 2000:
            pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)

        img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
        img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
        imgs.append(img)
for idx in range(len(result)):
    res = result[idx]
    image = imgs[idx]
    boxes = [line[0] for line in res]
    txts = [line[1][0] for line in res]
    scores = [line[1][1] for line in res]
    im_show = draw_ocr(
        image, boxes, txts, scores, font_path=font_path
    )  #'doc/fonts/simfang.ttf')
    im_show = Image.fromarray(im_show)
    im_show.save("result_page_{}.jpg".format(idx))

[2023/11/22 22:39:28] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=1, det_algorithm='DB', det_model_dir='/Users/velo1/.paddleocr/whl/det/ml/Multilingual_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/Users/velo1/.paddleocr/whl/rec/cyrillic/cyrillic_PP-OCRv3_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_