In [14]:
import paddle
paddle.utils.run_check()



Running verify PaddlePaddle program ... 




PaddlePaddle works well on 1 CPU.
PaddlePaddle is installed successfully! Let's start deep learning with PaddlePaddle now.


In [None]:
import cv2
import numpy as np
from pathlib import Path

from paddleocr import PaddleOCR 
from PIL import Image, ImageDraw, ImageFont 

from vision_core.loader.pdf_loader import PDFLoader
from vision_core.preprocessor.image_preprocessor import ImagePreprocessor
from vision_core.detector.table_detector import TableDetector
from vision_core.entities.table import Table
from vision_core.entities.bbox import BBox
from vision_core.entities.cell import Cell

PADDING = 5
source = Path("../examples/test")
output = Path("../examples/output/ocr_test")

files = sorted(source.glob("*.pdf"))

file = files[0]
print(f"Processing file: {file.name}")

pdf_bytes = file.read_bytes()

loader = PDFLoader(pdf_bytes=pdf_bytes)

ocr = PaddleOCR(
    text_recognition_model_name="cyrillic_PP-OCRv5_mobile_rec",
    text_recognition_model_dir="../models/cyrillic_PP-OCRv5_mobile_rec",
    text_detection_model_name="PP-OCRv5_server_det",
    text_detection_model_dir="../models/PP-OCRv5_server_det",
    use_doc_orientation_classify=False, 
    use_doc_unwarping=False, 
    use_textline_orientation=False, 
    device='cpu',

    )

img = loader.get_page_image(page_num=0, dpi=200)
detector_tbl = TableDetector()
preprocessor = ImagePreprocessor()

cleaned = preprocessor.process(img)
cleaned_ocr = cv2.cvtColor(cleaned, cv2.COLOR_GRAY2RGB)
Image.fromarray(cleaned_ocr).show()
tables: list[Table] = detector_tbl.detect_tables(cleaned)

for table in tables:

    original_tbl = table
    roi_table_img = table.bbox.padding(PADDING).roi(cleaned_ocr)

    result = ocr.predict(roi_table_img)

    for res in result:
        bboxes = [item for item in res['rec_boxes']]
        texts = [item for item in res['rec_texts']]
        scores = [item for item in res['rec_scores']]

        median_score = np.median(scores)
        print(f"Median OCR score: {median_score}")
        good_ratio = sum(1 for s in scores if s >= 0.7) / len(scores)
        print(f"Good OCR ratio (score >= 0.7): {good_ratio:.2%}")
        min_score = np.min(scores)
        print(f"Min OCR score: {min_score}")
        # Median OCR score: 0.8512758016586304
        # Good OCR ratio (score >= 0.7): 80.77%
        # Min OCR score: 0.4949820041656494
        cells: list[Cell] = table.cells

        for cell in cells:
            if cell.value is None:
                cell.value = ""

            for bb, text, score in zip(bboxes, texts, scores):
                # if score < 0.7:
                #     continue
                    
                abs_bb = BBox(
                    x_min=bb[0] + original_tbl.bbox.x_min - PADDING,
                    y_min=bb[1] + original_tbl.bbox.y_min - PADDING,
                    x_max=bb[2] + original_tbl.bbox.x_min - PADDING,
                    y_max=bb[3] + original_tbl.bbox.y_min - PADDING,
                )

                if cell.bbox.contains_center(abs_bb):
                    cell.value += text + "\n"
                    cell.blobs.append(abs_bb)

# отрисовка результатов
src = Image.fromarray(img)
overlay = src.convert("RGBA")
dst = Image.new("RGB", src.size, (255, 255, 255))

src_draw = ImageDraw.Draw(overlay, "RGBA")
dst_draw = ImageDraw.Draw(dst)
fnt = ImageFont.truetype("arial.ttf", 22)


for table in tables:

    for cell in table.cells:
        x1, y1, x2, y2 = cell.bbox.to_tuple()
        
        dst_draw.rectangle(((x1, y1), (x2, y2)), outline="red", width=1)
        dst_draw.text((x1 + 2, y1 + 2), cell.value.strip(), fill="black", font=fnt)

        for word_bbox in cell.blobs:
            wb_x1, wb_y1, wb_x2, wb_y2 = word_bbox.to_tuple()
            src_draw.rectangle(((wb_x1, wb_y1), (wb_x2, wb_y2)), fill=(144, 238, 144, 128))

src_result = Image.alpha_composite(src.convert("RGBA"), overlay).convert("RGB")
combinesd_image = Image.new("RGB", (src.width * 2, src.height))
combinesd_image.paste(src_result, (0, 0))
combinesd_image.paste(dst, (src.width, 0))
combinesd_image.show()      

[32mCreating model: ('PP-OCRv5_server_det', '../models/PP-OCRv5_server_det')[0m


Processing file: 126164.pdf


[32mCreating model: ('cyrillic_PP-OCRv5_mobile_rec', '../models/cyrillic_PP-OCRv5_mobile_rec')[0m
[32m2025-12-22 20:24:37.468[0m | [1mINFO    [0m | [36mvision_core.preprocessor.image_enhancer[0m:[36menhance[0m:[36m24[0m - [1mПроцесс адаптивной обработки изображения начат[0m
[32m2025-12-22 20:24:37.473[0m | [34m[1mDEBUG   [0m | [36mvision_core.preprocessor.image_enhancer[0m:[36menhance[0m:[36m40[0m - [34m[1mУдаление шума с изображения 0.38[0m
[32m2025-12-22 20:24:37.476[0m | [34m[1mDEBUG   [0m | [36mvision_core.preprocessor.image_enhancer[0m:[36menhance[0m:[36m57[0m - [34m[1mУлучшение контраста изображения 0.22[0m
[32m2025-12-22 20:24:37.533[0m | [1mINFO    [0m | [36mvision_core.preprocessor.image_enhancer[0m:[36menhance[0m:[36m62[0m - [1mПроцесс адаптивной обработки изображения завершен[0m
[32m2025-12-22 20:24:41.491[0m | [34m[1mDEBUG   [0m | [36mvision_core.detector.table_detector[0m:[36mdetect_tables[0m:[36m63[0m - [34

Median OCR score: 0.9707940816879272
Good OCR ratio (score >= 0.7): 97.10%
Min OCR score: 0.5270758867263794


In [1]:
from vision_core.loader.pdf_loader import PDFLoader
from pathlib import Path
from paddleocr import PPStructureV3 


source = Path("../examples/test/126164.pdf")
pdf_bytes = source.read_bytes()
loader = PDFLoader(pdf_bytes=pdf_bytes)

img = loader.get_page_image(page_num=0)

pipeline = PPStructureV3(
    use_doc_orientation_classify=False,
    use_doc_unwarping=False,
    use_formula_recognition=False,
    use_chart_recognition=False,
    lang="ru",
    device='cpu'
)

result = pipeline.predict(img)

for res in result:
    res.print()
    res.save_to_img("ppstructure_result_page_.png")
    res.save_to_markdown("ppstructure_result_page_.md")

  from .autonotebook import tqdm as notebook_tqdm
[33mChecking connectivity to the model hosters, this may take a while. To bypass this check, set `DISABLE_MODEL_SOURCE_CHECK` to `True`.[0m
[32mCreating model: ('PP-DocBlockLayout', None)[0m
[32mModel files already exist. Using cached files. To redownload, please delete the directory manually: `C:\Users\dev\.paddlex\official_models\PP-DocBlockLayout`.[0m
[32mCreating model: ('PP-DocLayout_plus-L', None)[0m
[32mModel files already exist. Using cached files. To redownload, please delete the directory manually: `C:\Users\dev\.paddlex\official_models\PP-DocLayout_plus-L`.[0m
[32mCreating model: ('PP-LCNet_x1_0_textline_ori', None)[0m
[32mModel files already exist. Using cached files. To redownload, please delete the directory manually: `C:\Users\dev\.paddlex\official_models\PP-LCNet_x1_0_textline_ori`.[0m
[32mCreating model: ('PP-OCRv5_server_det', None)[0m
[32mModel files already exist. Using cached files. To redownload, p

In [24]:
from vision_core.loader.pdf_loader import PDFLoader
from pathlib import Path
from paddleocr import TableCellsDetection 


source = Path("../examples/test/126164.pdf")
pdf_bytes = source.read_bytes()
loader = PDFLoader(pdf_bytes=pdf_bytes)

img = loader.get_page_image(page_num=0)

model = TableCellsDetection()

result = model.predict(img, threshold=0.8, batch_size=1)

for res in result:
    res.print()
    res.save_to_img("cell_detection_result_page_.png")


[32mModel files already exist. Using cached files. To redownload, please delete the directory manually: `C:\Users\dev\.paddlex\official_models\RT-DETR-L_wired_table_cell_det`.[0m
[32m{'res': {'input_path': None, 'page_index': None, 'boxes': [{'cls_id': 0, 'label': 'cell', 'score': 0.9530835151672363, 'coordinate': [np.float32(153.22197), np.float32(743.7157), np.float32(1263.736), np.float32(824.79193)]}, {'cls_id': 0, 'label': 'cell', 'score': 0.9492114186286926, 'coordinate': [np.float32(1997.1945), np.float32(742.60547), np.float32(2354.7637), np.float32(823.8765)]}, {'cls_id': 0, 'label': 'cell', 'score': 0.9485578536987305, 'coordinate': [np.float32(154.35794), np.float32(553.45013), np.float32(1264.8751), np.float32(649.67096)]}, {'cls_id': 0, 'label': 'cell', 'score': 0.9477660655975342, 'coordinate': [np.float32(1624.1086), np.float32(744.17645), np.float32(1997.7921), np.float32(824.571)]}, {'cls_id': 0, 'label': 'cell', 'score': 0.9469155073165894, 'coordinate': [np.float3

In [None]:
from vision_core.loader.pdf_loader import PDFLoader
from pathlib import Path
from paddleocr import LayoutDetection

source = Path("../examples/test/good3.pdf")
pdf_bytes = source.read_bytes()
loader = PDFLoader(pdf_bytes=pdf_bytes)
img = loader.get_page_image(page_num=0)

model = LayoutDetection()
output = model.predict(img, batch_size=1, layout_nms=True)
for res in output:
    res.print()
    res.save_to_img('layout_detection_result_page_.png')

[32mModel files already exist. Using cached files. To redownload, please delete the directory manually: `C:\Users\dev\.paddlex\official_models\PP-DocLayout_plus-L`.[0m
[32m{'res': {'input_path': None, 'page_index': None, 'boxes': [{'cls_id': 8, 'label': 'table', 'score': 0.9911592602729797, 'coordinate': [np.float32(84.35543), np.float32(454.96985), np.float32(2422.4897), np.float32(3414.241)]}, {'cls_id': 2, 'label': 'text', 'score': 0.5877549052238464, 'coordinate': [np.float32(92.24892), np.float32(327.72696), np.float32(2405.147), np.float32(403.93857)]}]}}[0m


In [31]:
from vision_core.loader.pdf_loader import PDFLoader
from pathlib import Path
from paddleocr import PaddleOCRVL

source = Path("../examples/test/bad2-зерно-сильный разрыв контуров букв.pdf")
pdf_bytes = source.read_bytes()
loader = PDFLoader(pdf_bytes=pdf_bytes)
img = loader.get_page_image(page_num=0)

pipeline = PaddleOCRVL(use_doc_orientation_classify=False, use_doc_unwarping=False)
output = pipeline.predict(img)
for res in output:
    res.print()
    res.save_to_img('vlm_result_page_.png')

[32mCreating model: ('PP-DocLayoutV2', None)[0m
[32mUsing official model (PP-DocLayoutV2), the model files will be automatically downloaded and saved in `C:\Users\dev\.paddlex\official_models\PP-DocLayoutV2`.[0m
[32mCreating model: ('PaddleOCR-VL-0.9B', None)[0m
[32mUsing official model (PaddleOCR-VL), the model files will be automatically downloaded and saved in `C:\Users\dev\.paddlex\official_models\PaddleOCR-VL`.[0m
[32mLoading configuration file C:\Users\dev\.paddlex\official_models\PaddleOCR-VL\config.json[0m
[32mLoading weights file C:\Users\dev\.paddlex\official_models\PaddleOCR-VL\model.safetensors[0m
[32muse GQA - num_heads: 16- num_key_value_heads: 2[0m
[32muse GQA - num_heads: 16- num_key_value_heads: 2[0m
[32muse GQA - num_heads: 16- num_key_value_heads: 2[0m
[32muse GQA - num_heads: 16- num_key_value_heads: 2[0m
[32muse GQA - num_heads: 16- num_key_value_heads: 2[0m
[32muse GQA - num_heads: 16- num_key_value_heads: 2[0m
[32muse GQA - num_heads: 16-