基本步骤
- step1:边缘检测
- step2:获取轮廓
- step3:变换
- step4:OCR

In [1]:
# 包引入
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import pytesseract
import os

In [2]:
# 定义一些函数
# 图像显示函数
def CV_show(name, img):
    "显示输入图像"
    cv2.imshow(name, img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# 重构图像的尺寸
def resize(img, width=None, height=None, inter=cv2.INTER_AREA):
    "据输入的长宽重新定义图像的尺寸"
    dim=None
    (h, w) = img.shape[:2]
    if width==None and height==None:
        return img
    elif height==None:
        r = width/float(w)
        dim = (width, int(h * r))
    else:
        r = height/float(h)
        dim = (int(w * r), height)
    resized = cv2.resize(img, dim, interpolation=inter)
    return resized

# 对一个矩形的四组坐标点排序,0123对应为从左上角顺时针依次数的四个角
def order_point(pts):
    rect = np.zeros((4,2),dtype="float32")
    
    # 先计算左上角与wd右下角,将x y坐标相加,最大的为右下角,最小的为左上角
    s = pts.sum(axis=1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]
    
    # 计算右上角与左下角,将x y相除y/x最小为右上,最大为左下
    diff = np.diff(pts, axis=1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]
    
    return rect

# 对二维图像进行透视变换
def four_point_transform(img, dst):
    # 需要两组坐标,来计算变换矩阵
    rect = order_point(dst)
    (tl, tr, br, bl) = rect
    
    # 计算原始图像中的w,h,并确定变换后图像中的w,h
    widthA = np.sqrt(((br[0]-bl[0])**2)+((br[1]-bl[1])**2))
    widthB = np.sqrt(((tr[0]-tl[0])**2)+((tr[1]-tl[1])**2))
    width = max(int(widthA), int(widthB)) # 需要将数据强制转换为整数类型,否则在计算转换矩阵时会报错
    
    heightA = np.sqrt((br[0]-tr[0])**2+(br[1]-tr[1])**2)
    heightB = np.sqrt((bl[0]-tl[0])**2+(bl[1]-tl[1])**2)
    height = max(int(heightA), int(heightB))
    
    # 定义变换后坐标
    dst = np.array(
            [[0,0],
            [width-1,0],
            [width-1, height-1],
            [0, height-1]], dtype="float32")
    
    # 计算变换矩阵   
    # 参数:两组坐标点-原始输入坐标与目标坐标
    M = cv2.getPerspectiveTransform(rect, dst)
    # 计算变换后结果
    warped = cv2.warpPerspective(img, M, (width, height))
    # 返回结果
    return warped

In [3]:
img = cv2.imread("Document1.jpg")
origin = img.copy()
ratio = img.shape[0] / 1000.0

resized = resize(origin, height=1000)

# 预处理
gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY) # 将图片转换为灰度图
gray = cv2.GaussianBlur(gray, (5, 5), 0) # 高斯滤波，滤除图像中的噪声
edged = cv2.Canny(gray, 100, 200) # canny边缘检测，得到图像的大致边缘

# 展示预处理之后的图像
CV_show("预处理图像", edged)

In [4]:
# 轮廓检测,显示轮廓检测结果
cnts = cv2.findContours(edged, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[1] # 返回边缘检测中的轮廓图像
img_cnts = cv2.drawContours(resized.copy(), cnts, -1, (0,0,255))
CV_show("轮廓检测", img_cnts)

In [5]:
# 在轮廓中筛选出目标轮廓

cnts = sorted(cnts, key=cv2.contourArea, reverse=True) # 返回一个轮廓序列，由轮廓面积大到小排列（reverse=True）

for c in cnts:
    # 计算轮廓的近似
    peri = cv2.arcLength(c, True)
    approx = cv2.approxPolyDP(c, 0.02 * peri, True)
    if len(approx)==4:# 检测近似轮廓是否为完整矩形
        screenCnt = approx
        break

img_cnts = cv2.drawContours(resized.copy(), [screenCnt], -1, (0,0,255))
CV_show("轮廓检测", img_cnts)

问题
在文本较多的纸张或者文字与纸张边缘较为贴近时,轮廓检测会出现一些问题即无法检测到最外围的轮廓,可能时检测方法问题

In [6]:
# 轮廓进行透视变换
warped_img = four_point_transform(origin, screenCnt.reshape(4,2) * ratio)

# 二值化
warped_img_gray = cv2.cvtColor(warped_img, cv2.COLOR_BGR2GRAY)
warped_img_thresh = cv2.threshold(warped_img_gray, 100, 255, cv2.THRESH_BINARY)[1]
copied = warped_img_thresh.copy()
resized = resize(copied, height=1000)
CV_show("变换后图像", resized)

# 将结果保存
cv2.imwrite("scan.jpg", warped_img_thresh)

True

In [7]:
# 利用OCR检测文本中的文字
img = cv2.imread("scan.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, gray)

text = pytesseract.image_to_string(Image.open(filename))
print(text)
os.remove(filename)

dust be what ye wont to bo

Don't take others —vansidlor ation.


