In [1]:
import cv2
import numpy as np

In [2]:
def cv_show(img):
    # 图像读取
    cv2.imshow('image', img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [3]:
def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
    # 根据同等变例resize图像
    dim = None
    (h, w) = image.shape[:2]
    
    if width is None and height is None:
        return image

    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)
    else:
        r = width / float(w)
        dim = (width, int(h * r))

    resized = cv2.resize(image, dim, interpolation=inter)
    return resized

In [4]:
def order_points(pts):
    # 一共4个坐标点
    rect = np.zeros((4, 2), dtype = "float32")

    # 按顺序找到对应坐标0123分别是 左上，右上，右下，左下
    # 计算左上，右下
    s = pts.sum(axis=1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]

    # 计算右上和左下
    diff = np.diff(pts, axis=1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]

    return rect

In [5]:
def four_point_transform(image, pts):
    # 获取输入坐标点
    rect = order_points(pts)
    (tl, tr, br, bl) = rect

    # 计算输入的w和h值，相当于计算两点之间的距离
    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(int(widthA), int(widthB))

    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))

    # 变换后对应坐标位置
    dst = np.array([[0, 0],
                    [maxWidth-1, 0],
                    [maxWidth-1, maxHeight-1],
                    [0, maxHeight-1]], 
                    dtype="float32")

    # 计算变换矩阵
    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))

    # 返回变换后结果
    return warped

## 图像预处理

In [6]:
# 读取输入, 并记录修改的坐标变化
image = "data/scan/page.jpg"
image = cv2.imread(image)

ratio = image.shape[0] / 500.0
orig = image.copy()
image = resize(orig, height=500)

In [7]:
# 预处理，边缘检测
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(gray, 75, 200)

cv_show(gray)
cv_show(edged)

## 轮廓检测

In [8]:
# 轮廓检测
cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]

# 遍历轮廓
for c in cnts:
    # 计算轮廓的近似
    peri = cv2.arcLength(c ,True)
    # C表示输入点集，epsilon表示从原始轮廓到近似轮廓的最大距离，True表示封闭
    approx = cv2.approxPolyDP(c, 0.02 * peri, True)
    
    # 4个点的时候拿出来
    if len(approx) == 4:
        screenCnt = approx
        break

In [9]:
# 展示结果

cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
cv_show(image)

## 透视变换并处理

In [10]:
warped = four_point_transform(orig, screenCnt.reshape(4, 2)*ratio)

warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
ref = cv2.threshold(warped, 100, 255, cv2.THRESH_BINARY)[1]

cv2.imwrite('data/scan/scan.jpg', ref)
cv_show(ref)

## OCR识别
* https://digi.bib.uni-mannheim.de/tesseract/下载安装
* 配置全局变量
* 修改pytesseract.py中的tesseract_cmd

In [11]:
import os
import cv2
import pytesseract

from PIL import Image

In [12]:
# 读取图片并转化为灰度图

image = cv2.imread('data/scan/scan.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

In [13]:
# 进行预处理
preprocess = 'blur'

if preprocess == "thresh":
    gray = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
if preprocess == "blur":
    gray = cv2.medianBlur(gray, 3)

# 保存处理后的图像
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, gray)

True

In [14]:
text = pytesseract.image_to_string(Image.open(filename))
print(text)

os.remove(filename)

pa Yeu bs SENG WANE SP VN bP web Pasi be

On Line rp we manipulate the top deft pach in Vive an
age, Which is located at coordinate oor and set it ter tae:
avalueot (9, 8, 256) Hee were reading thes paver vatie
In RGB format, we would have a value ob fs for red, o for
green. and 256 tor blue. thus making, ita pure Dhue cobor

However, as | mentioned above, We need fo take special
care when working with Open@¥ Our prvels are actual.
stored in BGR format, net RGB format

We actually read this pixel as 255 for red, G for green, and
O tor blue, making it a red color, uefa blue color

Atter setting the top lett pixel to have a red color on Line
1g, we then grab the pixel value and print it back to con:
sole on Lines 15 and 16, just to demonstrate that we have
indeed successfully changed the color of the pixel

Accessing and setting a single pixel value is simple enough,
but what if we wanted to use NumP’s array shomy capa
bilities to access larger rectangular portions of the image?
Phe code