## 机器自动识别验证码

#### 识别思路：
1. 灰度处理
2. 二值化
3. 去除边框
4. 降噪
5. 切割字符或者倾斜度矫正
6. 训练字体库
7. 识别

#### 1.灰度处理&二值化

In [1]:
#自适应阀值二值化。

import os
import cv2
def _get_dynamic_binary_image(filedir, img_name):
    filename = './out_img/' + img_name.split('.')[0] + '_binary.jpg'
    img_name = filedir + '/' + img_name
    print(img_name + ' has been read!')
    im = cv2.imread(img_name)
    im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)#灰度化
    th = cv2.adaptiveThreshold(im, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 21,1)
    cv2.imwrite(filename, th)
    return th

#### 2.去除边框

In [4]:
#去除边框
#OpenCV内图片的矩阵点是反过来的，就是长和宽是颠倒的。
def clear_border(img, img_name):
    filename = './out_img/' + img_name.split('.')[0] + '_clearBorder.jpg'
    h, w = img.shape[:2]
    for y in range(0, w):
        for x in range(0, h):
            if y - w == 2:
                img[x, y] = 255
            if x - h == 2:
                img[x, y] = 255
        cv2.imwrite(filename, img)
        return img

#### 3.降噪

In [5]:
# 1.干扰线降噪
def interference_line(img, img_name):
    filename = './out_img/' + img_name.split('.')[0] + '_interferenceline.jpg'
    h, w = img.shape[:2]
    for y in range(1, w-1):
        for x in range(1, h-1):
            count = 0
            if img[x, y-1] > 245:
                count += 1
            if img[x, y+1] > 245:
                count += 1
            if img[x-1, y] > 245:
                count += 1
            if img[x+1, y] > 245:
                count += 1
            if count > 2:
                img[x, y] = 255
        cv2.imwrite(filename, img)
        return img

In [7]:
#点降噪
def interference_point(img, img_name, x =0, y =0):
    '''
    9邻域框，以当前点为中心的田字框，黑点个数
    :param x:
    :param y:
    :return:
    '''
    filename = './out_img/' + img_name.split('.')[0] + '_interferencepoint.jpg'
    #todo 判断图片的长宽下限
    cur_pixel = img[x, y]
    height, width = img.shape[:2]
    for y in range(width -1):
        for x in range(height - 1):
            if y == 0: #第一行
                if x == 0: #左上顶点，4邻域
                    #中心点旁边3个点
                    sum = int(cur_pixel) + int(img[x, y+1])+ int(img[x+1, y]) + int(img[x+1, y+1])
                    if sum <= 2 * 245:
                        img[x, y] = 0
                elif x == height -1 : #右上顶点
                    sum = int(cur_pixel) + int(img[x, y +1]) + int(img[x-1, y]) + int(img[x-1, y+1])
                    if sum <= 2 * 245:
                        img[x, y] = 0
                else: #最上非顶点， 6邻域
                    sum = int(img[x-1, y]) + int(cur_pixel) + int(img[x-1, y+1]) + int(img[x, y+1]) + int(img[x+1, y]) + int(img[x+1, y+1])
                    if sum <= 3 * 245:
                        img[x, y ] = 0
            elif y == width - 1: #最下面一行
                if x == 0: #左下顶点
                    #中心点旁边3个点
                    sum = int(cur_pixel) + int(img[x+1, y]) + int(img[x+1, y-1]) + int(img[x, y-1])
                    if sum <= 2*245:
                        img[x, y] = 0
                elif x == height - 1: #右下顶点
                    sum = int(cur_pixel) + int(img[x, y-1]) + int(img[x-1, y]) + int(img[x-1, y-1])
                    if sum <= 2*245:
                        img[x, y] = 0
                else: #最下非顶点，6邻域
                    sum = int(cur_pixel) + int(img[x-1, y]) + int(img[x+1,y]) + int(img[x, y-1]) + int(img[x-1,y-1]) + int(img[x+1, y-1])
                    if sum <= 3*245:
                        img[x, y] = 0
            else: #y不在边界
                if x == 0: # 左边非顶点
                    sum = int(cur_pixel) + int(img[x, y-1]) + int(img[x, y+1]) + int(img[x+1,y-1]) + int(img[x+1, y]) + int(img[x+1, y+1])
                    if sum <= 3*245:
                        img[x, y] = 0
                elif x == height - 1: #右边非顶点
                    sum = int(cur_pixel) + int(img[x, y-1]) + int(img[x, y+1]) + int(img[x-1, y-1]) + int(img[x-1, y]) + int(img[x-1, y+1])
                    if sum <= 3*245:
                        img[x, y] = 0
                else: #具备9领域条件的
                    sum = int(cur_pixel) + int(img[x-1,y-1]) + int(img[x-1,y]) + int(img[x-1,y+1]) + int(img[x, y-1]) + int(img[x, y+1]) + int(img[x+1, y-1]) + int(img[x+1, y]) + int(img[x+1, y+1])
                    if sum <= 4*245:
                        img[x, y] = 0
    cv2.imwrite(filename, img)
    return img

#### 4.字符切割

　　字符切割的思路就是找到一个黑色的点，然后在遍历与他相邻的黑色的点，直到遍历完所有的连接起来的黑色的点，找出这些点中的最高的点、最低的点、最右边的点、最左边的点，记录下这四个点，认为这是一个字符，然后在向后遍历点，直至找到黑色的点，继续以上的步骤。最后通过每个字符的四个点进行切割

In [None]:
#用队列和集合记录遍历过的像素坐标代替单纯递归以解决cfs访问过深问题
def cfs(im, x_fd, y_fd):
    xaxis = []
    yaxis = []
    visited = set()
    q = Queue()
    q.put((x_fd, y_fd))
    visited.add((x_fd, y_fd))
    offsets = [(1, 0), (0, 1), (-1,0), (0, -1)] #四邻域
    while not q.empty():
        x, y = q.get()
        for xoffset, yoffset in offsets:
            x_neighbor, y_neighbor = x + xoffset, y + yoffset
            if (x_neighbor, y_neighbor) in (visited):
                continue #已经访问过了
            visited.add((x_neighbor, y_neighbor))
            try:
                if im[x_neighbor, y_neighbor] == 0:
                    xaxis.append(x_neighbor)
                    yaxis.append(y_neighbor)
                    q.put((x_neighbor, y_neighbor))
            except IndexError:
                pass
    if (len(xaxis) == 0 | len(yaxis) == 0):
        xmax = x_fd + 1
        xmin = x_fd
        ymax = y_fd + 1
        ymin = y_fd
    else:
        xmax = max(xaxis)
        xmin = min(xaxis)
        ymax = max(yaxis)
        ymin = min(yaxis)
    return xmax, xmin, ymax, ymin

def detectFgPix(im, xmax):
    '''
    搜索区块的起点
    '''
    h, w = im.shape[:2]
    for y_fd in range(xmax+1, w):
        for x_fd in range(h):
            if im[x_fd, y_fd] == 0:
                return x_fd, y_fd

def CFS(im):
    '''切割字符位置'''
    zoneL = [] #各区块长度L列表
    zoneWB = [] #各区块的X轴[起点，终点]列表
    zoneHB = [] #各区块的Y轴[起点，终点]列表
    xmax = 0 #上一区块结束黑点的横坐标，这里是初始化
    for i in range(10):
        try:
            x_fd, y_fd = detectFgPix(im, xmax)
            xmax, xmin, ymax, ymin = cfs(im, x_fd, y_fd)
            L = xmax - xmin
            H = ymax - ymin
            zoneL.append(L)
            zoneWB.append([xmin,xmax])
            zoneHB.append([ymin, ymax])
        except TypeError:
            return zoneL, zoneWB, zoneHB
        