### 生成验证码

In [1]:
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
import shutil
import string
import os
import random

def random_color():
    '''获取一个随机颜色(r,g,b)格式的'''
    c1 = random.randint(0, 255)
    c2 = random.randint(0, 255)
    c3 = random.randint(0, 255)
    return c1, c2, c3

def random_xy():
    '''获取一个随机的坐标，用来添加噪声'''
    x=random.randint(0,132) # width
    y=random.randint(0,40)  # height
    return x,y

def random_str():
    '''从26个大写英文字母+10个阿拉伯数字中获取一个随机的字符'''
    all_str=list(string.digits+string.ascii_uppercase)
    random_char = random.choice(all_str)
    return random_char

class CreatImage:
    '''生成宽度132，高度40的验证码数据集'''
    def __init__(self):
        self.width=132 # 宽度
        self.height=40 # 高度
        self.train_num=5000  # 训练数据量
        self.test_num=1000   # 测试数据量
        self.font_file='C:/Windows/Fonts/simhei.ttf'  # 字体文件
        self.base_path='images/'
        self.train_path=os.path.join(self.base_path,'train')
        self.test_path=os.path.join(self.base_path,'test')
        if not os.path.exists(self.train_path):
            os.makedirs(self.train_path)
        if not os.path.exists(self.test_path):
            os.makedirs(self.test_path)
            
    def divice(self):
        '''划分测试集'''
        for _ in range(self.test_num):
            img=random.choice(os.listdir(self.train_path))
            shutil.move(f'{self.train_path}/{img}',f'{self.test_path}/{img}')
    
    def creat_img(self,howmany):
        '''生成验证码'''
        for _ in range(howmany):
            image=Image.new('RGB', (self.width, self.height), (250,250,250))
            draw = ImageDraw.Draw(image)
            font = ImageFont.truetype(self.font_file, size=30)
            # 画线
            for i in range(2):
                y1 = random.randint(0, self.height)
                y2 = random.randint(0, self.height)
                draw.line((0, y1, self.width, y2), fill=random_color(),width=3)
            # 画点
            for i in range(1000):
                draw.point(random_xy(),fill=random_color())
            # 写字
            temp = []
            for i in range(4):
                random_char = random_str()
                y=random.randint(0,6)
                draw.text((15+i*30, y), random_char, (0,0,0), font=font)
                temp.append(random_char)
            valid_str = "".join(temp)
            image.save(f'{self.train_path}/{valid_str}.png')
    
    def main(self):
        '''主函数。考虑到文件名重复的情况，直到生成5000张用来训练的验证码和1000张用来测试的验证码为止'''
        while True:
            howmany=(self.train_num+self.test_num)-len(os.listdir(self.train_path))
            if howmany == 0:
                break
            self.creat_img(howmany)
        self.divice()

if __name__ == '__main__':
    creator=CreatImage()
    creator.main()

### 建立CNN神经网络进行训练以及测试

In [2]:
from torch.nn import Module
from torch.nn import Sequential
from torch.nn import Conv2d
from torch.nn import BatchNorm2d
from torch.nn import Dropout
from torch.nn import ReLU
from torch.nn import MaxPool2d
from torch.nn import Linear
import string
import os

path_image = 'images/'
path_train = os.path.join(path_image, 'train')
path_test = os.path.join(path_image, 'test')
captcha_number = 4
image_height = 40
image_width = 132
all_str = {v: k for k, v in enumerate(list(string.digits + string.ascii_uppercase))}

class CNNModel(Module):

    def __init__(self):
        super(CNNModel, self).__init__()

        # 设定参数
        self.pool = 2  # 最大池化
        self.padding = 1  # 矩形边的补充层数
        self.dropout = 0.2  # 随机抛弃概率
        self.kernel_size = 3  # 卷积核大小 3x3

        # 卷积池化
        self.layer1 = Sequential(
            # 时序容器Sequential,参数按顺序传入
            # 2维卷积层，卷积核大小为self.kernel_size，边的补充层数为self.padding
            Conv2d(1, 32, kernel_size=self.kernel_size, padding=self.padding),
            # 对小批量3d数据组成的4d输入进行批标准化操作
            BatchNorm2d(32),
            # 随机将输入张量中部分元素设置为0，随机概率为self.dropout。
            Dropout(self.dropout),
            # 对输入数据运用修正线性单元函数
            ReLU(),
            # 最大池化
            MaxPool2d(self.pool))

        # 卷积池化
        self.layer2 = Sequential(
            Conv2d(32, 64, kernel_size=self.kernel_size, padding=self.padding),
            BatchNorm2d(64),
            Dropout(self.dropout),
            ReLU(),
            MaxPool2d(self.pool))

        # 卷积池化
        self.layer3 = Sequential(
            Conv2d(64, 128, kernel_size=self.kernel_size, padding=self.padding),
            BatchNorm2d(128),
            Dropout(self.dropout),
            ReLU(),
            MaxPool2d(self.pool))

        # 全连接
        self.fc = Sequential(
            Linear((image_width // 8) * (image_height // 8) * 128, 1024),
            Dropout(self.dropout),
            ReLU())
        self.rfc = Sequential(Linear(1024, captcha_number * len(all_str)))

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        out = self.rfc(out)
        return out

In [3]:
import numpy as np
import string

captcha_number=4  # 验证码字符数量
# 26个大写字母+10个数字
all_str = {v: k for k, v in enumerate(list(string.digits + string.ascii_uppercase))}

def one_hot_encode(value: list) -> tuple:
    '''编码：将字符转为独热码，vector为独热码，order用于解码'''
    order = []
    shape = captcha_number * len(all_str)
    vector = np.zeros(shape, dtype=float)
    for k, v in enumerate(value):
        index = k * len(all_str) + all_str.get(v)
        vector[index] = 1.0
        order.append(index)
    return vector, order

def one_hot_decode(value: list) -> str:
    '''解码：将独热码转为字符'''
    res = []
    for ik, iv in enumerate(value):
        val = iv - ik * len(all_str)
        for k, v in all_str.items():
            if val == int(v):
                res.append(k)
                break
    return ''.join(res)
    
if __name__ == '__main__':
    vector,order=one_hot_encode('LOVE')
    print(f'独热码：{vector}')
    print(f'用于解码的列表：{order}')
    print(f'解码结果：{one_hot_decode(order)}')

独热码：[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
用于解码的列表：[21, 60, 103, 122]
解码结果：LOVE


In [4]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image
import torch
import cv2
import os

class ImageDataSet(Dataset):
    '''图片加载和处理'''
    
    def __init__(self, folder):
        self.transform = transforms.Compose([
            transforms.Lambda(lambda x:process_img(x)),
            transforms.ToTensor()
        ])
        self.images = [os.path.join(folder,i) for i in os.listdir(folder)]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_path = self.images[idx]
        image = self.transform(image_path)
        # 获取独热码和字符位置列表
        vector, order = one_hot_encode(image_path[-8:-4])
        label = torch.from_numpy(vector)
        return image, label, order

def process_img(img_path: str) -> object:
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # 灰度
    img = cv2.threshold(img,20,255,cv2.THRESH_BINARY)[1] # 二值化
    img = Image.fromarray(cv2.cvtColor(img,cv2.COLOR_BGR2RGB)).convert('L') # 转为PIL并将通道数转为1
    return img

def loaders(folder: str, size: int) -> object:
    # 包装数据和目标张量的数据集
    objects = ImageDataSet(folder)
    return DataLoader(objects, batch_size=size, shuffle=True)

In [5]:
from torch.nn import MultiLabelSoftMarginLoss
from torch.autograd import Variable
from torch.optim import Adam
import logging
logging.basicConfig(level=logging.INFO)

# 数字与大写字母混合
all_str = {v: k for k, v in enumerate(list(string.digits + string.ascii_uppercase))}

# 图片路径
path_image = 'images/'
path_train = os.path.join(path_image, 'train')
path_test = os.path.join(path_image, 'test')

# 图片规格
captcha_number = 4
image_height = 40
image_width = 132

# 训练参数
epochs = 5
batch_size = 25
rate = 0.001
model_name = 'result.pkl'

def train_model():
    model = CNNModel().cuda() # 注意：如果你的电脑没有GPU，或者安装的pytorch不是GPU版本，则直接去掉cuda()这个方法即可
    model.train()  # 训练模式
    logging.info('Train start')
    # 损失函数
    criterion = MultiLabelSoftMarginLoss()
    # Adam算法
    optimizer = Adam(model.parameters(), lr=rate)
    ids = loaders(path_train, batch_size)
    logging.info('Iteration is %s' % len(ids))
    for epoch in range(epochs):
        for i, (image, label, order) in enumerate(ids):
            # 包装Tensor对象并记录其operations
            images = Variable(image).cuda()
            labels = Variable(label.float()).cuda()
            predict_labels = model(images)
            loss = criterion(predict_labels, labels)
            # 保持当前参数状态并基于计算得到的梯度进行参数更新。
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            i += 1
            if i % 100 == 0:
                logging.info("epoch:%s, step:%s, loss:%s" % (epoch+1, i, loss.item()))
                # 保存训练结果
                torch.save(model.state_dict(), model_name)
    # 保存训练结果
    torch.save(model.state_dict(), model_name)
    logging.info('Train done')
if __name__ == '__main__':
    train_model()

INFO:root:Train start
INFO:root:Iteration is 200
INFO:root:epoch:1, step:100, loss:0.11154153943061829
INFO:root:epoch:1, step:200, loss:0.07168767601251602
INFO:root:epoch:2, step:100, loss:0.03714725747704506
INFO:root:epoch:2, step:200, loss:0.026876429095864296
INFO:root:epoch:3, step:100, loss:0.014294213615357876
INFO:root:epoch:3, step:200, loss:0.012000598944723606
INFO:root:epoch:4, step:100, loss:0.007509223185479641
INFO:root:epoch:4, step:200, loss:0.008324207738041878
INFO:root:epoch:5, step:100, loss:0.0068277632817626
INFO:root:epoch:5, step:200, loss:0.009294134564697742
INFO:root:Train done


### 测试模型准确率

In [6]:
def test_model():
    model = CNNModel().cuda()
    model.eval()  # 预测模式
    # 载入模型
    model.load_state_dict(torch.load(model_name))
    logging.info('load cnn model')
    verifies = loaders(path_test, 1)
    correct, total, cha_len,  = 0, 0, len(all_str)
    for i, (image, label, order) in enumerate(verifies):
        captcha = one_hot_decode(order)  # 正确的验证码
        images = Variable(image).cuda()
        predict_label = model(images)
        predicts = []
        for k in range(captcha_number):
            # 根据预测结果取值
            code = one_hot_decode([(np.argmax(predict_label[0, k * cha_len: (k + 1) * cha_len].data.cpu().numpy()))])
            predicts.append(code)
        predict = ''.join(predicts)  # 预测结果
        total += 1
        if predict == captcha:
            correct += 1
        else:
            logging.info('Fail, captcha:%s->%s' % (captcha, predict))
    logging.info(f'完成。总预测图片数为{total}张，准确率为{int(100 * correct / total)}%')

if __name__ == '__main__':
    test_model()

INFO:root:load cnn model
INFO:root:Fail, captcha:0X3W->OX3W
INFO:root:Fail, captcha:QH5L->QH5I
INFO:root:Fail, captcha:UFMN->UFMI
INFO:root:完成。总预测图片数为1000张，准确率为99%


### 识别验证码

In [7]:
def predict_model(img_path: str) -> str:
    model = CNNModel().cuda()
    model.eval()  # 预测模式
    model.load_state_dict(torch.load(model_name)) # 载入模型
    transform = transforms.Compose([transforms.Lambda(lambda x:process_img(x)),transforms.ToTensor()])
    image=Image.open(img_path)
    img = transform(img_path).reshape((-1,1,image.height,image.width))
    predict_label = model(Variable(img).cuda())
    predicts=[]
    for k in range(captcha_number):
        code = one_hot_decode([(np.argmax(predict_label[0, k * len(all_str): (k + 1) * len(all_str)].data.cpu().numpy()))])
        predicts.append(code)
    predict = ''.join(predicts)
    return predict
if __name__ == '__main__':
    pred=predict_model('LOVE.png')
    print(f'识别结果为：{pred}')

识别结果为：LOVE
