In [27]:
import cv2
import numpy as np

from paddleocr import PaddleOCR, draw_ocr

# 初始化OCR模型
ocr = PaddleOCR(use_angle_cls=True, lang='ch')

def crop_invoice_paddle(image_path, output_path):
    # 读取图片
    img = cv2.imread(image_path)
    width = img.shape[1]
    height = img.shape[0]
    
    # 进行OCR检测
    result = ocr.ocr(img, cls=True)

    # 获取文本区域的所有边界框
    boxes = [line[0] for line in result[0]]

    # 计算最小的外接矩形
    x_min = int(min(box[0][0] for box in boxes))
    y_min = int(min(box[0][1] for box in boxes))
    x_max = int(max(box[2][0] for box in boxes))
    y_max = int(max(box[2][1] for box in boxes))

    # 裁剪图片
    cropped_img = img[max(y_min-20, 0):min(y_max+20, height), max(x_min-20,0 ):min(x_max+20 , width)]

    # 保存裁剪后的图片
    cv2.imwrite(output_path, cropped_img)
    print(f"裁剪后的图片已保存到 {output_path}")


[2024/12/05 17:02:24] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/Users/aliancn/.paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/Users/aliancn/.paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_tex

In [30]:
crop_invoice_paddle("image.png", "cropped_image.png")

[2024/12/05 17:03:58] ppocr DEBUG: dt_boxes num : 43, elapsed : 0.2832331657409668
[2024/12/05 17:03:58] ppocr DEBUG: cls num  : 43, elapsed : 0.15288901329040527
[2024/12/05 17:04:04] ppocr DEBUG: rec_res num  : 43, elapsed : 5.213464975357056
裁剪后的图片已保存到 cropped_image.png


In [22]:
import cv2
import numpy as np
def crop_invoice_robust(image_path, output_path):
    # 读取图像
    image = cv2.imread(image_path)
    if image is None:
        print("无法读取图片，请检查路径")
        return

    # 转为灰度图
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # 中值滤波，去除小噪点
    smoothed = cv2.medianBlur(gray, 9)

    # 自适应阈值分割，将背景与内容分离
    binary = cv2.adaptiveThreshold(
        smoothed, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 15, 10
    )

    # 查找非零像素点（发票内容区域）
    coords = cv2.findNonZero(binary)

    if coords is not None:
        # 获取包含非零像素点的最小矩形框
        x, y, w, h = cv2.boundingRect(coords)

        # 扩展边界框，避免裁剪太紧
        margin = 5
        x = max(x - margin, 0)
        y = max(y - margin, 0)
        w = min(w + 2 * margin, image.shape[1] - x)
        h = min(h + 2 * margin, image.shape[0] - y)

        # 裁剪图像
        cropped_image = image[y:y+h, x:x+w]

        # 保存裁剪后的图像
        cv2.imwrite(output_path, cropped_image)
        print(f"裁剪后的图片已保存至 {output_path}")
    else:
        print("未检测到有效的发票区域，请检查图片内容")

In [23]:
crop_invoice_robust("image994.jpg", "cropped_image994.jpg")

裁剪后的图片已保存至 cropped_image994.jpg
