<a href="https://colab.research.google.com/github/Maxence-29/Video-Excel-OCR/blob/master/code/notebook01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Imports

In [1]:
!pip install paddleocr paddlepaddle opencv-python pandas
import cv2
import os
from paddleocr import PaddleOCR
import pandas as pd

Connecting to https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/fonts/simfang.ttf ...
Downloading simfang.ttf ...


Read video and capture frame

In [2]:
video_path = "/content/assets/成绩单.mp4"
output_dir = "/content/frames"
interval = 5

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

cap = cv2.VideoCapture(video_path)
frame_count = 0
saved_count = 0

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        break

    if frame_count % interval == 0:
        filename = os.path.join(output_dir, f"frame_{saved_count:03d}.jpg")
        cv2.imwrite(filename, frame)
        print(f"Saved {filename}")
        saved_count += 1

    frame_count += 1

cap.release()
print("Done.")

Done.


Frame enhancement

In [3]:
frame_path = sorted([os.path.join('/content/frames', f) for f in os.listdir('/content/frames') if f.endswith(".jpg")])

if not os.path.exists('/content/enhanced'):
    os.makedirs('/content/enhanced')

idx = 0
for f_path in frame_path:
    img = cv2.imread(f_path)

    # 灰度化
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # 提高对比度（CLAHE）
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(gray)

    # 锐化
    blurred = cv2.GaussianBlur(enhanced, (7, 7), 10.0)
    sharpened = cv2.addWeighted(enhanced, 2, blurred, -0.5, 0)

    filename = os.path.join('/content/enhanced', f"enhanced_frame_{idx:03d}.jpg")
    cv2.imwrite(filename, sharpened)
    cv2.destroyAllWindows()

    print(f"Processed {filename}")
    idx += 1

print("Done.")

Done.


> Here we introduce manual image selection and reshape for the following OCR step.

OCR

Model: PaddleOCR

In [4]:
ocr = PaddleOCR(use_textline_orientation=True, lang='ch')

def ocr_to_dataframe(img_path, ocr_model, y_thresh=10):
    # 使用已初始化的模型获取 OCR 结果
    result = ocr_model.ocr(img_path)

    if not result or result[0] is None:
        print(f"[!] No OCR result for {img_path}")
        return None

    boxes = result[0]
    data = []
    for box in boxes:
        # 添加对 box[0] 结构是否符合预期的检查
        if len(box[0]) != 4:
            print(f"[!] Skipping box with unexpected structure in {img_path}: {box}")
            continue # 跳过到下一个 box

        (x0, y0), (x1, y1), (x2, y2), (x3, y3) = box[0]
        text = box[1][0]
        conf = box[1][1]
        x_center = (x0 + x2) / 2
        y_center = (y0 + y2) / 2
        data.append((text, conf, x_center, y_center))

    # 函数的其余部分保持不变
    data = sorted(data, key=lambda x: x[3])

    rows = []
    current_row = []
    for i, item in enumerate(data):
        if i == 0:
            current_row.append(item)
            continue
        _, _, _, y = item
        _, _, _, prev_y = data[i - 1]

        if abs(y - prev_y) > y_thresh:
            rows.append(current_row)
            current_row = [item]
        else:
            current_row.append(item)
    if current_row:
        rows.append(current_row)

    final_table = []
    for row in rows:
        sorted_row = sorted(row, key=lambda x: x[2])
        texts = [cell[0] for cell in sorted_row]
        final_table.append(texts)

    return pd.DataFrame(final_table)

[32mCreating model: ('PP-LCNet_x1_0_doc_ori', None)[0m
[32mUsing official model (PP-LCNet_x1_0_doc_ori), the model files will be automatically downloaded and saved in /root/.paddlex/official_models.[0m


Connecting to https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-LCNet_x1_0_doc_ori_infer.tar ...
Downloading PP-LCNet_x1_0_doc_ori_infer.tar ...
Extracting PP-LCNet_x1_0_doc_ori_infer.tar


[32mCreating model: ('UVDoc', None)[0m
[32mUsing official model (UVDoc), the model files will be automatically downloaded and saved in /root/.paddlex/official_models.[0m


Connecting to https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/UVDoc_infer.tar ...
Downloading UVDoc_infer.tar ...
Extracting UVDoc_infer.tar


[32mCreating model: ('PP-LCNet_x0_25_textline_ori', None)[0m
[32mUsing official model (PP-LCNet_x0_25_textline_ori), the model files will be automatically downloaded and saved in /root/.paddlex/official_models.[0m


Connecting to https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-LCNet_x0_25_textline_ori_infer.tar ...
Downloading PP-LCNet_x0_25_textline_ori_infer.tar ...
Extracting PP-LCNet_x0_25_textline_ori_infer.tar


[32mCreating model: ('PP-OCRv5_mobile_det', None)[0m
[32mUsing official model (PP-OCRv5_mobile_det), the model files will be automatically downloaded and saved in /root/.paddlex/official_models.[0m


Connecting to https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_mobile_det_infer.tar ...
Downloading PP-OCRv5_mobile_det_infer.tar ...
Extracting PP-OCRv5_mobile_det_infer.tar


[32mCreating model: ('PP-OCRv5_mobile_rec', None)[0m
[32mUsing official model (PP-OCRv5_mobile_rec), the model files will be automatically downloaded and saved in /root/.paddlex/official_models.[0m


Connecting to https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_mobile_rec_infer.tar ...
Downloading PP-OCRv5_mobile_rec_infer.tar ...
Extracting PP-OCRv5_mobile_rec_infer.tar


Implement

In [5]:
img_dir = "/content/imgs"
img_paths = sorted([os.path.join(img_dir, f) for f in os.listdir(img_dir) if f.endswith(".jpg")])

dfs = []
for idx, img_path in enumerate(img_paths):
    # 将已初始化的 ocr_model 传递给函数
    df = ocr_to_dataframe(img_path, ocr_model=ocr)
    if df is not None:
        dfs.append(df)
        print("df shape: ", df.shape, "\n")
        print(f"[✓] OCR Done: {img_path}")
    else:
        print(f"[!] Skipped: {img_path}")

print("Done.")

  result = ocr_model.ocr(img_path)
[33mResized image size (24533x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/01_副本.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/01_副本.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/01_副本.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/01_副本.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/01_副本.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/01_副本.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/01_副本.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/01_副本.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/01_副本.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/01_副本.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/01_副本.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/01_副本.jpg: te

  result = ocr_model.ocr(img_path)
[33mResized image size (736x20732) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/01_副本1.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/01_副本1.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/01_副本1.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/01_副本1.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/01_副本1.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/01_副本1.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/01_副本1.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/01_副本1.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/01_副本1.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/01_副本1.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/01_副本1.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/01

  result = ocr_model.ocr(img_path)
[33mResized image size (12369x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/01_副本10.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/01_副本10.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/01_副本10.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/01_副本10.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/01_副本10.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/01_副本10.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/01_副本10.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/01_副本10.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/01_副本10.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/01_副本10.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/01_副本10.jpg: rec_polys
[!] Skipping box with unexpected structure in /cont

  result = ocr_model.ocr(img_path)
[33mResized image size (8921x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/01_副本11.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/01_副本11.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/01_副本11.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/01_副本11.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/01_副本11.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/01_副本11.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/01_副本11.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/01_副本11.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/01_副本11.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/01_副本11.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/01_副本11.jpg: rec_polys
[!] Skipping box with unexpected structure in /cont

  result = ocr_model.ocr(img_path)
[33mResized image size (16727x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/01_副本2.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/01_副本2.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/01_副本2.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/01_副本2.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/01_副本2.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/01_副本2.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/01_副本2.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/01_副本2.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/01_副本2.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/01_副本2.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/01_副本2.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/01

  result = ocr_model.ocr(img_path)
[33mResized image size (16175x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/01_副本3.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/01_副本3.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/01_副本3.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/01_副本3.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/01_副本3.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/01_副本3.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/01_副本3.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/01_副本3.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/01_副本3.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/01_副本3.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/01_副本3.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/01

  result = ocr_model.ocr(img_path)
[33mResized image size (16727x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/01_副本4.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/01_副本4.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/01_副本4.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/01_副本4.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/01_副本4.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/01_副本4.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/01_副本4.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/01_副本4.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/01_副本4.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/01_副本4.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/01_副本4.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/01

  result = ocr_model.ocr(img_path)
[33mResized image size (16000x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/01_副本5.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/01_副本5.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/01_副本5.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/01_副本5.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/01_副本5.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/01_副本5.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/01_副本5.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/01_副本5.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/01_副本5.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/01_副本5.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/01_副本5.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/01

  result = ocr_model.ocr(img_path)
[33mResized image size (736x16175) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/01_副本6.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/01_副本6.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/01_副本6.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/01_副本6.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/01_副本6.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/01_副本6.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/01_副本6.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/01_副本6.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/01_副本6.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/01_副本6.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/01_副本6.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/01

  result = ocr_model.ocr(img_path)
[33mResized image size (12065x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/01_副本7.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/01_副本7.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/01_副本7.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/01_副本7.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/01_副本7.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/01_副本7.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/01_副本7.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/01_副本7.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/01_副本7.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/01_副本7.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/01_副本7.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/01

  result = ocr_model.ocr(img_path)
[33mResized image size (10222x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/01_副本8.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/01_副本8.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/01_副本8.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/01_副本8.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/01_副本8.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/01_副本8.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/01_副本8.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/01_副本8.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/01_副本8.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/01_副本8.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/01_副本8.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/01

  result = ocr_model.ocr(img_path)
[33mResized image size (12369x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/01_副本9.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/01_副本9.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/01_副本9.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/01_副本9.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/01_副本9.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/01_副本9.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/01_副本9.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/01_副本9.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/01_副本9.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/01_副本9.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/01_副本9.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/01

  result = ocr_model.ocr(img_path)
[33mResized image size (12266x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/02_副本.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/02_副本.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/02_副本.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/02_副本.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/02_副本.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/02_副本.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/02_副本.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/02_副本.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/02_副本.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/02_副本.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/02_副本.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/02_副本.jpg: te

  result = ocr_model.ocr(img_path)
[33mResized image size (12065x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/02_副本1.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/02_副本1.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/02_副本1.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/02_副本1.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/02_副本1.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/02_副本1.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/02_副本1.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/02_副本1.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/02_副本1.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/02_副本1.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/02_副本1.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/02

  result = ocr_model.ocr(img_path)
[33mResized image size (735x17116) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/02_副本10.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/02_副本10.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/02_副本10.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/02_副本10.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/02_副本10.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/02_副本10.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/02_副本10.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/02_副本10.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/02_副本10.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/02_副本10.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/02_副本10.jpg: rec_polys
[!] Skipping box with unexpected structure in /cont

  result = ocr_model.ocr(img_path)
[33mResized image size (17523x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/02_副本11.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/02_副本11.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/02_副本11.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/02_副本11.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/02_副本11.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/02_副本11.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/02_副本11.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/02_副本11.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/02_副本11.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/02_副本11.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/02_副本11.jpg: rec_polys
[!] Skipping box with unexpected structure in /cont

  result = ocr_model.ocr(img_path)
[33mResized image size (16727x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/02_副本12.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/02_副本12.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/02_副本12.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/02_副本12.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/02_副本12.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/02_副本12.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/02_副本12.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/02_副本12.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/02_副本12.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/02_副本12.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/02_副本12.jpg: rec_polys
[!] Skipping box with unexpected structure in /cont

  result = ocr_model.ocr(img_path)
[33mResized image size (736x16919) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/02_副本13.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/02_副本13.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/02_副本13.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/02_副本13.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/02_副本13.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/02_副本13.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/02_副本13.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/02_副本13.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/02_副本13.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/02_副本13.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/02_副本13.jpg: rec_polys
[!] Skipping box with unexpected structure in /cont

  result = ocr_model.ocr(img_path)
[33mResized image size (8608x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/02_副本2.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/02_副本2.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/02_副本2.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/02_副本2.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/02_副本2.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/02_副本2.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/02_副本2.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/02_副本2.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/02_副本2.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/02_副本2.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/02_副本2.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/02

  result = ocr_model.ocr(img_path)
[33mResized image size (11967x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/02_副本3.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/02_副本3.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/02_副本3.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/02_副本3.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/02_副本3.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/02_副本3.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/02_副本3.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/02_副本3.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/02_副本3.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/02_副本3.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/02_副本3.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/02

  result = ocr_model.ocr(img_path)
[33mResized image size (12369x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/02_副本4.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/02_副本4.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/02_副本4.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/02_副本4.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/02_副本4.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/02_副本4.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/02_副本4.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/02_副本4.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/02_副本4.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/02_副本4.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/02_副本4.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/02

  result = ocr_model.ocr(img_path)
[33mResized image size (12369x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/02_副本5.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/02_副本5.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/02_副本5.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/02_副本5.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/02_副本5.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/02_副本5.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/02_副本5.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/02_副本5.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/02_副本5.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/02_副本5.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/02_副本5.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/02

  result = ocr_model.ocr(img_path)
[33mResized image size (16727x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/02_副本6.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/02_副本6.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/02_副本6.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/02_副本6.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/02_副本6.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/02_副本6.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/02_副本6.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/02_副本6.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/02_副本6.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/02_副本6.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/02_副本6.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/02

  result = ocr_model.ocr(img_path)
[33mResized image size (736x16919) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/02_副本7.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/02_副本7.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/02_副本7.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/02_副本7.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/02_副本7.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/02_副本7.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/02_副本7.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/02_副本7.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/02_副本7.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/02_副本7.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/02_副本7.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/02

  result = ocr_model.ocr(img_path)
[33mResized image size (17317x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/02_副本8.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/02_副本8.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/02_副本8.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/02_副本8.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/02_副本8.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/02_副本8.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/02_副本8.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/02_副本8.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/02_副本8.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/02_副本8.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/02_副本8.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/02

  result = ocr_model.ocr(img_path)
[33mResized image size (735x17116) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/02_副本9.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/02_副本9.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/02_副本9.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/02_副本9.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/02_副本9.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/02_副本9.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/02_副本9.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/02_副本9.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/02_副本9.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/02_副本9.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/02_副本9.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/02

  result = ocr_model.ocr(img_path)
[33mResized image size (16727x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/03_副本.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/03_副本.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/03_副本.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/03_副本.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/03_副本.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/03_副本.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/03_副本.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/03_副本.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/03_副本.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/03_副本.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/03_副本.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/03_副本.jpg: te

  result = ocr_model.ocr(img_path)
[33mResized image size (17523x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/03_副本1.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/03_副本1.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/03_副本1.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/03_副本1.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/03_副本1.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/03_副本1.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/03_副本1.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/03_副本1.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/03_副本1.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/03_副本1.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/03_副本1.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/03

  result = ocr_model.ocr(img_path)
[33mResized image size (16727x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/03_副本2.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/03_副本2.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/03_副本2.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/03_副本2.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/03_副本2.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/03_副本2.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/03_副本2.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/03_副本2.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/03_副本2.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/03_副本2.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/03_副本2.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/03

  result = ocr_model.ocr(img_path)
[33mResized image size (16539x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/04_副本.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/04_副本.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/04_副本.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/04_副本.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/04_副本.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/04_副本.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/04_副本.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/04_副本.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/04_副本.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/04_副本.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/04_副本.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/04_副本.jpg: te

  result = ocr_model.ocr(img_path)
[33mResized image size (16919x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/04_副本1.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/04_副本1.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/04_副本1.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/04_副本1.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/04_副本1.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/04_副本1.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/04_副本1.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/04_副本1.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/04_副本1.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/04_副本1.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/04_副本1.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/04

  result = ocr_model.ocr(img_path)
[33mResized image size (15333x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/04_副本10.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/04_副本10.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/04_副本10.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/04_副本10.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/04_副本10.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/04_副本10.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/04_副本10.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/04_副本10.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/04_副本10.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/04_副本10.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/04_副本10.jpg: rec_polys
[!] Skipping box with unexpected structure in /cont

  result = ocr_model.ocr(img_path)
[33mResized image size (16727x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/04_副本2.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/04_副本2.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/04_副本2.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/04_副本2.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/04_副本2.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/04_副本2.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/04_副本2.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/04_副本2.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/04_副本2.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/04_副本2.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/04_副本2.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/04

  result = ocr_model.ocr(img_path)
[33mResized image size (736x17317) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/04_副本3.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/04_副本3.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/04_副本3.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/04_副本3.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/04_副本3.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/04_副本3.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/04_副本3.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/04_副本3.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/04_副本3.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/04_副本3.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/04_副本3.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/04

  result = ocr_model.ocr(img_path)
[33mResized image size (16919x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/04_副本4.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/04_副本4.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/04_副本4.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/04_副本4.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/04_副本4.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/04_副本4.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/04_副本4.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/04_副本4.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/04_副本4.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/04_副本4.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/04_副本4.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/04

  result = ocr_model.ocr(img_path)
[33mResized image size (17317x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/04_副本5.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/04_副本5.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/04_副本5.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/04_副本5.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/04_副本5.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/04_副本5.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/04_副本5.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/04_副本5.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/04_副本5.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/04_副本5.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/04_副本5.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/04

  result = ocr_model.ocr(img_path)
[33mResized image size (17116x735) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/04_副本6.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/04_副本6.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/04_副本6.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/04_副本6.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/04_副本6.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/04_副本6.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/04_副本6.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/04_副本6.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/04_副本6.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/04_副本6.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/04_副本6.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/04

  result = ocr_model.ocr(img_path)
[33mResized image size (17951x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/04_副本7.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/04_副本7.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/04_副本7.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/04_副本7.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/04_副本7.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/04_副本7.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/04_副本7.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/04_副本7.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/04_副本7.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/04_副本7.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/04_副本7.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/04

  result = ocr_model.ocr(img_path)
[33mResized image size (16919x736) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/04_副本8.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/04_副本8.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/04_副本8.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/04_副本8.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/04_副本8.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/04_副本8.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/04_副本8.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/04_副本8.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/04_副本8.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/04_副本8.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/04_副本8.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/04

  result = ocr_model.ocr(img_path)
[33mResized image size (17116x735) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[!] Skipping box with unexpected structure in /content/imgs/04_副本9.jpg: input_path
[!] Skipping box with unexpected structure in /content/imgs/04_副本9.jpg: page_index
[!] Skipping box with unexpected structure in /content/imgs/04_副本9.jpg: doc_preprocessor_res
[!] Skipping box with unexpected structure in /content/imgs/04_副本9.jpg: dt_polys
[!] Skipping box with unexpected structure in /content/imgs/04_副本9.jpg: model_settings
[!] Skipping box with unexpected structure in /content/imgs/04_副本9.jpg: text_det_params
[!] Skipping box with unexpected structure in /content/imgs/04_副本9.jpg: text_type
[!] Skipping box with unexpected structure in /content/imgs/04_副本9.jpg: text_rec_score_thresh
[!] Skipping box with unexpected structure in /content/imgs/04_副本9.jpg: rec_texts
[!] Skipping box with unexpected structure in /content/imgs/04_副本9.jpg: rec_scores
[!] Skipping box with unexpected structure in /content/imgs/04_副本9.jpg: rec_polys
[!] Skipping box with unexpected structure in /content/imgs/04

Concat and Output

In [6]:
if dfs:
    max_rows = max(df.shape[0] for df in dfs)
    dfs = [df.reindex(index=range(max_rows)) for df in dfs]
    merged_df = pd.concat(dfs, axis=1)

    output_dir = "/content/output"
    os.makedirs(output_dir, exist_ok=True)

    merged_df.to_csv(os.path.join(output_dir, "ocr_table.csv"), index=False, header=False)
    print("Done.")
else:
    print("No dataframes were generated. Output file not created.")

Done.
