Imports

In [1]:
import cv2
import os
from paddleocr import PaddleOCR
import pandas as pd



Read video and capture frame

In [None]:
video_path = "../assets/成绩单.mp4"
output_dir = "../frames"
interval = 5

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

cap = cv2.VideoCapture(video_path)
frame_count = 0
saved_count = 0

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        break

    if frame_count % interval == 0:
        filename = os.path.join(output_dir, f"frame_{saved_count:03d}.jpg")
        cv2.imwrite(filename, frame)
        print(f"Saved {filename}")
        saved_count += 1

    frame_count += 1

cap.release()
print("Done.")

Frame enhancement

In [None]:
frame_path = sorted([os.path.join('../frames', f) for f in os.listdir('../frames') if f.endswith(".jpg")])

if not os.path.exists('../enhanced'):
    os.makedirs('../enhanced')

idx = 0
for f_path in frame_path:
    img = cv2.imread(f_path)

    # 灰度化
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # 提高对比度（CLAHE）
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(gray)

    # 锐化
    blurred = cv2.GaussianBlur(enhanced, (7, 7), 10.0)
    sharpened = cv2.addWeighted(enhanced, 2, blurred, -0.5, 0)

    filename = os.path.join('../enhanced', f"enhanced_frame_{idx:03d}.jpg")
    cv2.imwrite(filename, sharpened)
    cv2.destroyAllWindows()

    print(f"Processed {filename}")
    idx += 1

print("Done.")

> Here we introduce manual image selection and reshape for the following OCR step.

OCR

Model: PaddleOCR

In [2]:
def ocr_to_dataframe(img_path, y_thresh=10):
    ocr = PaddleOCR(use_angle_cls=True, lang='ch')
    result = ocr.ocr(img_path, cls=True)

    if not result or result[0] is None:
        print(f"[!] No OCR result for {img_path}")
        return None

    boxes = result[0]
    data = []
    for box in boxes:
        (x0, y0), (x1, y1), (x2, y2), (x3, y3) = box[0]
        text = box[1][0]
        conf = box[1][1]
        x_center = (x0 + x2) / 2
        y_center = (y0 + y2) / 2
        data.append((text, conf, x_center, y_center))

    data = sorted(data, key=lambda x: x[3])

    rows = []
    current_row = []
    for i, item in enumerate(data):
        if i == 0:
            current_row.append(item)
            continue
        _, _, _, y = item
        _, _, _, prev_y = data[i - 1]

        if abs(y - prev_y) > y_thresh:
            rows.append(current_row)
            current_row = [item]
        else:
            current_row.append(item)
    if current_row:
        rows.append(current_row)

    final_table = []
    for row in rows:
        sorted_row = sorted(row, key=lambda x: x[2])
        texts = [cell[0] for cell in sorted_row]
        final_table.append(texts)

    return pd.DataFrame(final_table)

Implement

In [3]:
img_dir = "../imgs"
img_paths = sorted([os.path.join(img_dir, f) for f in os.listdir(img_dir) if f.endswith(".jpg")])

dfs = []
for idx, img_path in enumerate(img_paths):
    df = ocr_to_dataframe(img_path)
    if df is not None:
        dfs.append(df)
        print("df shape: ", df.shape, "\n")
        print(f"[✓] OCR Done: {img_path}")
    else:
        print(f"[!] Skipped: {img_path}")

print("Done.")

[2025/06/03 15:10:19] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/Users/highsun/.paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/Users/highsun/.paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch

Concat and Output

In [4]:
max_rows = max(df.shape[0] for df in dfs)
dfs = [df.reindex(index=range(max_rows)) for df in dfs]
merged_df = pd.concat(dfs, axis=1)

output_dir = "../output"
os.makedirs(output_dir, exist_ok=True)

merged_df.to_csv(os.path.join(output_dir, "ocr_table.csv"), index=False, header=False)
print("Done.")

Done.
