### OpenCV的EAST模型

EAST模型是一个基于深度学习的文本检测模型，用于检测图片中的文本区域。具有高效和准确的特点，特别是在处理自然场景文本时，能够在多个人复杂背景下快速定位文本区域。  

In [4]:
import cv2
import numpy as np

# 加载 EAST 模型
net = cv2.dnn.readNet("frozen_east_text_detection.pb")

# 读取图片
image = cv2.imread('img1.jpg')

inpWidth = 640
inpHeight = 480

# pre-process image
blob = cv2.dnn.blobFromImage(
    image, 1.0, (inpWidth, inpHeight), (123.68, 116.78, 103.94), True, False
)

print(blob.shape)
print(net)

(1, 3, 480, 640)
< cv2.dnn.Net 0x7fda8845bb10>


In [13]:
import cv2
import numpy as np

# 加载 EAST 模型
net = cv2.dnn.readNet("frozen_east_text_detection.pb")

# 读取图片
image = cv2.imread('img1.jpg')
orig = image.copy()
(H, W) = image.shape[:2]

# 预处理：将图片缩放至 640x480
newW, newH = 640, 480

# 转换为 blob
blob = cv2.dnn.blobFromImage(image, 1.0, (newW, newH), (123.68, 116.78, 103.94), True, crop=False)

# 设置输入并执行前向推理
net.setInput(blob)
scores, geometry = net.forward(["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"])

# 获取分数和几何信息的维度
num_rows = scores.shape[2]
num_cols = scores.shape[3]

# 用来保存文本框的坐标
boxes = []
confidence_threshold = 0.4  # 可以调整这个阈值来控制检测框的筛选

# 遍历每个像素并解码文本框
for y in range(num_rows):
    for x in range(num_cols):
        score = scores[0, 0, y, x]  # 获取当前像素的得分
        if score < confidence_threshold:
            continue  # 如果得分低于阈值，跳过

        # 获取当前像素的几何信息（边界框的几何参数）
        offsetX = x * 4.0
        offsetY = y * 4.0
        angle = geometry[0, 4, y, x]  # 旋转角度
        cosA = np.cos(angle)
        sinA = np.sin(angle)

        h = geometry[0, 0, y, x]  # 高度
        w = geometry[0, 1, y, x]  # 宽度

        # 计算文本框的四个顶点
        endX = int(offsetX + cosA * w + sinA * h)
        endY = int(offsetY - sinA * w + cosA * h)

        startX = int(offsetX - cosA * w - sinA * h)
        startY = int(offsetY + sinA * w - cosA * h)

        # 将检测框坐标从缩放后的图像尺寸映射回原始图像尺寸
        startX = int(startX * (W / newW))
        startY = int(startY * (H / newH))
        endX = int(endX * (W / newW))
        endY = int(endY * (H / newH))

        # 保存框的坐标
        boxes.append((startX, startY, endX, endY))


# 对检测框进行非最大抑制（NMS），去除冗余框
def non_max_suppression(boxes, overlapThresh=0.3):
    if len(boxes) == 0:
        return []
    
    boxes = np.array(boxes)
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    
    # 计算每个框的面积
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    
    # 按照y2坐标进行排序
    idxs = np.argsort(y2)

    pick = []
    
    while len(idxs) > 0:
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)
        
        suppress = [last]
        for pos in range(last):
            j = idxs[pos]
            
            # 计算交并比（IoU）
            xx1 = max(x1[i], x1[j])
            yy1 = max(y1[i], y1[j])
            xx2 = min(x2[i], x2[j])
            yy2 = min(y2[i], y2[j])
            
            w = max(0, xx2 - xx1 + 1)
            h = max(0, yy2 - yy1 + 1)
            
            overlap = (w * h) / area[j]
            
            if overlap > overlapThresh:
                suppress.append(pos)
        
        # 删除所有被抑制的框
        idxs = np.delete(idxs, suppress)
    
    # 返回经过抑制后的框
    return [boxes[i] for i in pick]

# 对检测框进行非最大抑制，去除重叠框
boxes = non_max_suppression(boxes, overlapThresh=0.4)

# 绘制文本框
for (startX, startY, endX, endY) in boxes:
    cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2)

# 显示结果
cv2.imwrite('detected_img.jpg', orig)


True