## 一、导入包以及设置随机种子

In [1]:
# import os
# root = os.getcwd()               #获得当前路径 /home/dir1
# print (root)
# !cd r "D:\Learning_materials\CS\zero_one\Jacob's_ladder\Research_teaching_team\Code\ocr_start\captcha_ocr"

In [2]:
import numpy as np
import pandas as pd
# from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tqdm import tqdm
import os
import time

import torch
from torch import nn
from torchvision import models
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision  import transforms


from PIL import Image


import random
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)



## 二、以类的方式定义超参数



In [3]:
class argparse():
    def __init__(self) -> None:
        self.captcha_size = 4
        self.captcha_array = "0123456789abcdefghijklmnopqrstuvwxyz"
        
        self.batch_size = 512
        self.lr = 0.001
        self.epochs = 20
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        # self.data_train = np.array([-2, -1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 18, 20])
        # self.data_val = np.array([15, 16, 17, 0.1, -3, -4])

args = argparse()




## 三、定义自己的模型



In [4]:
class Mymodel(nn.Module):
    def __init__(self):
        super(Mymodel,self).__init__()

        self.seq = nn.Sequential(
            nn.Conv2d(in_channels=1,out_channels=64,kernel_size=3,padding=1),
            nn.ReLU(),  
            nn.MaxPool2d(kernel_size=2), #[6, 64, 30, 80],

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2), #[6, 128, 15, 40]

            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),  # [6, 256, 7, 20]
            
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Flatten()

        )

        self.layer = nn.Sequential(
          #[6, 2560] [64, 15360]
          nn.Linear(in_features=15360,out_features=4096),
          nn.Dropout(0.2),  # drop 20% of the neuron
          nn.ReLU(),
          nn.Linear(in_features=4096, out_features = args.captcha_size*args.captcha_array.__len__())
        )

    def forward(self, x):
        x = self.seq(x)
        x = self.layer(x)
        return x


In [5]:
class myResNet(nn.Module):
    def __init__(self) -> None:
        super(myResNet,self).__init__()
        self.model = models.resnet50(pretrained=False)
        self.model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        self.model.fc = nn.Linear(in_features=2048,out_features=args.captcha_size*args.captcha_array.__len__(), bias=True)

    def forward(self, x):
        x = self.model(x)
        return x 



## 五、定义自己的数据集Dataset,DataLoader



In [6]:
class Tools():

    # def __init__(self):
    #   pass

    def text2vec(self, text):
        # 4行36列
        vec = torch.zeros((args.captcha_size, len(args.captcha_array)))
        for i in range(len(text)):
            vec[i, args.captcha_array.index(text[i])] = 1
        return vec
    # text2vec('aab1')

    def vec2text(self, vec):
        vec = torch.argmax(vec, dim = 1)
        # print(vec)
        text = ''
        for v in vec:
            text += args.captcha_array[v]
        return text
    
    def accuracy(self, y_hat, y):  #@save
        """计算预测正确的数量"""
            
        if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
            y_hat = y_hat.argmax(axis=1)
        cmp = y_hat.type(y.dtype) == y
        return float(cmp.type(y.dtype).sum())

tls = Tools()
tls.vec2text(tls.text2vec('aab1'))

'aab1'

In [7]:
len(args.captcha_array) == args.captcha_array.__len__()

True

In [8]:

class My_datasets(Dataset):
    def __init__(self, root_dir):
        super(My_datasets, self).__init__()
        # self.image_path = os.listdir(root_dir)
        self.list_image_path = [os.path.join(root_dir, image_path) for image_path in os.listdir(root_dir)]

        self.transforms = transforms.Compose(
            [
            transforms.Resize((60,160)),
            transforms.Grayscale(),
            transforms.ToTensor()
            ]
        )

        # print(self.list_image_path)
    
    def __getitem__(self, index) :
        image_path = self.list_image_path[index]
        # print(image_path)
        img_ = Image.open(image_path)
        image_name = image_path.split('/')[-1]

        img_tensor = self.transforms(img_)
        # img_.show()
        img_label = image_name.split('_')[0]

        img_label = tls.text2vec(img_label)
        img_label = img_label.view(1, -1)[0]

        return img_tensor, img_label



        # return super().__getitem__(index)

    def __len__(self):
        return  self.list_image_path.__len__()

In [11]:
def train_original():
    train_path = r"./dataset/train/"
    test_path = r"./dataset/test/"

    train_dataset = My_datasets(train_path)
    train_dataloader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
    val_dataset = My_datasets(test_path)
    val_dataloader = DataLoader(dataset=val_dataset, batch_size=args.batch_size, shuffle=True)
    loss_fn=nn.MultiLabelSoftMarginLoss().to(args.device)
    model = myResNet().to(args.device) 

    optimizer = torch.optim.Adam(model.parameters(), lr = args.lr)
    # w=SummaryWriter("logs")
    total_step=0
    save_step = 100

    for epoch in range(args.epochs):
        min_loss = 100000
        for i,(imgs,targets) in enumerate(train_dataloader):
            imgs=imgs.to(args.device)
            targets=targets.to(args.device)
            # print(imgs.shape)
            # print(targets.shape)
            outputs=model(imgs)
            # print(outputs.shape)
            loss = loss_fn(outputs, targets)
            
            optimizer.zero_grad()

            loss.backward()
            optimizer.step()

            if loss < min_loss:
                min_loss = loss
                total_step+=1
                if total_step % save_step == 0:
                    save_step = max(save_step - 20 ,10)
                    print("save model {}".format(total_step))
                torch.save(model.state_dict(),"afterResNetmodel.pth")
        
        print("epoch{}, loss:{}".format(epoch, min_loss.item()))
    

    #         if i % 10000 == 0:
    #             total_step+=1
    #             print("训练{}次,loss:{}".format((total_step-1)*10000, loss.item()))
    # torch.save(model.state_dict(),"afterResNetmodel.pth")


In [None]:
def train_val():
    train_path = r"./dataset/train/"
    test_path = r"./dataset/test/"

    train_dataset = My_datasets(train_path)
    train_dataloader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
    val_dataset = My_datasets(test_path)
    val_dataloader = DataLoader(dataset=val_dataset, batch_size=args.batch_size, shuffle=True)
    loss_fn=nn.MultiLabelSoftMarginLoss().to(args.device)
    model = myResNet().to(args.device) 

    optimizer = torch.optim.Adam(model.parameters(), lr = args.lr)
    # w=SummaryWriter("logs")
    total_step=0
    save_step = 100

    for epoch in range(args.epochs):
        min_loss = 100000
        for i,(imgs,targets) in enumerate(train_dataloader):
            imgs=imgs.to(args.device)
            targets=targets.to(args.device)
            # print(imgs.shape)
            # print(targets.shape)
            outputs=model(imgs)
            # print(outputs.shape)
            loss = loss_fn(outputs, targets)
            
            optimizer.zero_grad()

            loss.backward()
            optimizer.step()

            if loss < min_loss:
                min_loss = loss
                total_step+=1
                if total_step % save_step == 0:
                    save_step = max(save_step - 20 ,10)
                    print("save model {}".format(total_step))
                torch.save(model.state_dict(),"afterResNetmodel.pth")
        
        print("epoch{}, loss:{}".format(epoch, min_loss.item()))

        # with torch.no_grad():
        #     model.eval()
        #     test_data = My_datasets("./dataset/test/")

        #     test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False)
        #     test_length = test_data.__len__()
        #     correct = 0;
        #     for i, (imgs, lables) in enumerate(test_dataloader):
        #         imgs = imgs.to(args.device)
        #         lables = lables.to(args.device)

        #         lables = lables.view(-1, args.captcha_array.__len__())

        #         lables_text = tls.vec2text(lables)
        #         predict_outputs = model(imgs)
        #         predict_outputs = predict_outputs.view(-1, args.captcha_array.__len__())
        #         predict_labels = tls.vec2text(predict_outputs)
        #         if predict_labels == lables_text:
        #             correct += 1
        #             # print("预测正确：正确值:{},预测值:{}".format(lables_text, predict_labels))
        #         else:
        #             print("预测失败:正确值:{},预测值:{}".format(lables_text, predict_labels))
        #         # m(imgs)
            
        #     cor_rate = correct / test_length * 100
        #     # if cor_rate
        #     print("正确率{}".format(cor_rate))
    

    #         if i % 10000 == 0:
    #             total_step+=1
    #             print("训练{}次,loss:{}".format((total_step-1)*10000, loss.item()))
    # torch.save(model.state_dict(),"afterResNetmodel.pth")


In [12]:
train_original()

save model 1
save model 2
save model 3
save model 4
save model 5
save model 6
save model 7
save model 8
save model 9
save model 10
save model 11
save model 12
save model 13
save model 14
save model 15
save model 16
save model 17
save model 18
save model 19
save model 20
save model 21
save model 22
save model 23
epoch0, loss:0.12704098224639893
save model 24
save model 25
save model 26
save model 27
epoch1, loss:0.12682418525218964
save model 28
save model 29
save model 30
save model 31
save model 32
save model 33
save model 34
epoch2, loss:0.12642735242843628
save model 35
save model 36
save model 37
save model 38
save model 39
save model 40
save model 41
save model 42
save model 43
save model 44
epoch3, loss:0.12514029443264008
save model 45
save model 46
save model 47
save model 48
save model 49
save model 50
save model 51
save model 52
save model 53
save model 54
save model 55
save model 56
save model 57
save model 58
save model 59
save model 60
save model 61
save model 62
save mode

In [None]:
def train():
    train_path = r"./dataset/train/"
    test_path = r"./dataset/test/"

    train_dataset = My_datasets(train_path)
    train_dataloader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
    val_dataset = My_datasets(test_path)
    val_dataloader = DataLoader(dataset=val_dataset, batch_size=args.batch_size, shuffle=True)

    model = myResNet().to(args.device) 
    criterion = nn.MultiLabelSoftMarginLoss()

    # criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)  

    train_epochs_loss = []
    valid_epochs_loss = []
    train_acc = []
    val_acc = []

    for epoch in range(args.epochs):
        model.train()
        train_epoch_loss = []
        acc, nums = 0, train_dataset.__len__()
        # =========================train=======================
        # test_length = test_data.__len__()
        for idx, (inputs, label) in enumerate(tqdm(train_dataloader)):
            inputs = inputs.to(args.device)
            label = label.to(args.device)
            label = label.view(-1, args.captcha_array.__len__())
            label_text = tls.vec2text(label)
            outputs = model(inputs)

            outputs = outputs.view(-1, args.captcha_array.__len__())
            predict_labels = tls.vec2text(outputs)
            if predict_labels == label_text:
                acc += 1
            #     print("预测正确：正确值:{},预测值:{}".format(label_text, predict_labels))
            # else:
            #     print("预测失败:正确值:{},预测值:{}".format(label_text, predict_labels))
            
            loss = criterion(outputs, label.long()).to(args.device)
            optimizer.zero_grad()

            loss.backward()
            # torch.nn.utils.clip_grad_norm_(model.parameters(), 2.0) #用来梯度裁剪
            optimizer.step()
            train_epoch_loss.append(loss.item())

            # outputs = predict_outputs.view(-1, args.captcha_array.__len__())
            # predict_labels = tls.vectotext(predict_outputs)

            # acc += (outputs == label).sum()
            # nums += label.size()[0]
            # print(nums)

        train_epochs_loss.append(np.average(train_epoch_loss))
        
        # print(type(train_epochs_loss))
        # print(np.average(train_epoch_loss))

        acc = float(acc)

        train_acc.append(100 * acc / nums)
        # print(100 * acc)
        print("train acc = {:.3f}%, loss = {}".format(100 * acc / nums, np.average(train_epoch_loss)))
        # =========================val=========================
        with torch.no_grad():
            model.eval()
            val_epoch_loss = []
            acc, nums = 0, val_dataloader.__len__()

            for idx, (inputs, label) in enumerate(tqdm(val_dataloader)):
                inputs = inputs.to(args.device)  # .to(torch.float)
                label = label.to(args.device)
                # outputs = model(inputs)

                label = label.view(-1, args.captcha_array.__len__())
                label_text = tls.vec2text(label)
                outputs = model(inputs)

                outputs = outputs.view(-1, args.captcha_array.__len__())
                predict_labels = tls.vec2text(outputs)
                if predict_labels == label_text:
                    acc += 1
                #     print("预测正确：正确值:{},预测值:{}".format(label_text, predict_labels))
                # else:
                #     print("预测失败:正确值:{},预测值:{}".format(label_text, predict_labels))

                loss = criterion(outputs, label)
                val_epoch_loss.append(loss.item())

                # acc += (outputs == label).sum()
                # nums += label.size()[0]
                # nums += label.size()[0]


            valid_epochs_loss.append(np.average(val_epoch_loss))
            val_acc.append(100 * acc / nums)

            print("epoch = {}, valid acc = {:.2f}%, loss = {}".format(epoch, 100 * acc / nums, np.average(val_epoch_loss)))
    # torch.save(model,"model.pth")
    torch.save(model.state_dict(),"af_bestmodel.pth")

In [13]:
def test_pred():
    m = myResNet()
    # m.load_state_dict(torch.load("best_model.pth",map_location = args.device))
    m.load_state_dict(torch.load("afterResNetmodel.pth",map_location = args.device), strict=False)

    m.to(args.device)
    m.eval()
    test_data = My_datasets("./dataset/test/")

    test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False)
    test_length = test_data.__len__()
    correct = 0;
    for i, (imgs, lables) in enumerate(test_dataloader):
        imgs = imgs.to(args.device)
        lables = lables.to(args.device)

        lables = lables.view(-1, args.captcha_array.__len__())

        lables_text = tls.vec2text(lables)
        predict_outputs = m(imgs)
        predict_outputs = predict_outputs.view(-1, args.captcha_array.__len__())
        predict_labels = tls.vec2text(predict_outputs)
        if predict_labels == lables_text:
            correct += 1
            # print("预测正确：正确值:{},预测值:{}".format(lables_text, predict_labels))
        else:
            print("预测失败:正确值:{},预测值:{}".format(lables_text, predict_labels))
        # m(imgs)
    print("正确率{}".format(correct / test_length * 100))
def pred_pic(pic_path):
    img=Image.open(pic_path)
    tersor_img=transforms.Compose([
        transforms.Grayscale(),
        transforms.Resize((60,160)),
        transforms.ToTensor()
    ])
    img=tersor_img(img).to(args.device)
    print(img.shape)
    img=torch.reshape(img,(-1,1,60,160))
    print(img.shape)
    m = torch.load("model.pth").to(args.device)
    outputs = m(img)
    outputs=outputs.view(-1,len(args.captcha_array))
    outputs_lable=tls.vec2text(outputs)
    print(outputs_lable)

In [14]:
test_pred()

预测失败:正确值:jbgl,预测值:ibgl
预测失败:正确值:w164,预测值:wz64
预测失败:正确值:g80u,预测值:880u
预测失败:正确值:t9eo,预测值:t980
预测失败:正确值:jln3,预测值:jln9
预测失败:正确值:k0g5,预测值:kog5
预测失败:正确值:etni,预测值:etnd
预测失败:正确值:oqhx,预测值:oqhh
预测失败:正确值:q045,预测值:qo45
预测失败:正确值:pdhb,预测值:pdbb
预测失败:正确值:17ig,预测值:12ig
预测失败:正确值:sz4c,预测值:5z4c
正确率94.0


In [None]:
def train():
    train_path = "./dataset/train/"
    test_path = "./dataset/test/"

    train_dataset = My_datasets(train_path)
    train_dataloader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
    val_dataset = My_datasets(test_path)
    val_dataloader = DataLoader(dataset=val_dataset, batch_size=args.batch_size, shuffle=True)

    model = myResNet().to(args.device) 
    criterion = nn.MultiLabelSoftMarginLoss()

    # criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)  

    train_epochs_loss = []
    valid_epochs_loss = []
    train_acc = []
    val_acc = []

    for epoch in range(args.epochs):
        model.train()
        train_epoch_loss = []
        acc, nums = 0, train_dataset.__len__()
        # =========================train=======================
        # test_length = test_data.__len__()
        for idx, (inputs, label) in enumerate(tqdm(train_dataloader)):
            inputs = inputs.to(args.device)
            label = label.to(args.device)
            label = label.view(-1, args.captcha_array.__len__())
            label_text = tls.vec2text(label)
            outputs = model(inputs)

            outputs = outputs.view(-1, args.captcha_array.__len__())
            predict_labels = tls.vec2text(outputs)
            if predict_labels == label_text:
                acc += 1
            #     print("预测正确：正确值:{},预测值:{}".format(label_text, predict_labels))
            # else:
            #     print("预测失败:正确值:{},预测值:{}".format(label_text, predict_labels))
            
            loss = criterion(outputs, label.long()).to(args.device)
            optimizer.zero_grad()

            loss.backward()
            # torch.nn.utils.clip_grad_norm_(model.parameters(), 2.0) #用来梯度裁剪
            optimizer.step()
            train_epoch_loss.append(loss.item())

            # outputs = predict_outputs.view(-1, args.captcha_array.__len__())
            # predict_labels = tls.vec2text(predict_outputs)

            # acc += (outputs == label).sum()
            # nums += label.size()[0]
            # print(nums)

        train_epochs_loss.append(np.average(train_epoch_loss))
        
        # print(type(train_epochs_loss))
        # print(np.average(train_epoch_loss))

        acc = float(acc)

        train_acc.append(100 * acc / nums)
        # print(100 * acc)
        print("train acc = {:.3f}%, loss = {}".format(100 * acc / nums, np.average(train_epoch_loss)))
        # =========================val=========================
        with torch.no_grad():
            model.eval()
            val_epoch_loss = []
            acc, nums = 0, val_dataloader.__len__()

            for idx, (inputs, label) in enumerate(tqdm(val_dataloader)):
                inputs = inputs.to(args.device)  # .to(torch.float)
                label = label.to(args.device)
                # outputs = model(inputs)

                label = label.view(-1, args.captcha_array.__len__())
                label_text = tls.vec2text(label)
                outputs = model(inputs)

                outputs = outputs.view(-1, args.captcha_array.__len__())
                predict_labels = tls.vec2text(outputs)
                if predict_labels == label_text:
                    acc += 1
                #     print("预测正确：正确值:{},预测值:{}".format(label_text, predict_labels))
                # else:
                #     print("预测失败:正确值:{},预测值:{}".format(label_text, predict_labels))

                loss = criterion(outputs, label)
                val_epoch_loss.append(loss.item())

                # acc += (outputs == label).sum()
                # nums += label.size()[0]
                # nums += label.size()[0]


            valid_epochs_loss.append(np.average(val_epoch_loss))
            val_acc.append(100 * acc / nums)

            print("epoch = {}, valid acc = {:.2f}%, loss = {}".format(epoch, 100 * acc / nums, np.average(val_epoch_loss)))



In [None]:
# train()

In [None]:
#     # =========================plot==========================
#     plt.figure(figsize=(12, 4))
#     plt.subplot(121)
#     plt.plot(train_epochs_loss[:])
#     plt.title("train_loss")
#     plt.subplot(122)
#     plt.plot(train_epochs_loss, '-o', label="train_loss")
#     plt.plot(valid_epochs_loss, '-o', label="valid_loss")
#     plt.title("epochs_loss")
#     plt.legend()
#     plt.show()
#     # =========================save model=====================
#     torch.save(model.state_dict(), 'model.pth')


# def pred(val):
#     model = Net(1, 32, 16, 2)
#     model.load_state_dict(torch.load('model.pth'))
#     model.eval()
#     val = torch.tensor(val).reshape(1, -1).float()
#     # 需要转换成相应的输入shape，而且得带上batch_size，因此转换成shape=(1,1)这样的形状
#     res = model(val)
#     # real: tensor([[-5.2095, -0.9326]], grad_fn=<AddmmBackward0>) 需要找到最大值所在的列数，就是标签
#     res = res.max(axis=1)[1].item()
#     print("predicted label is {}, {} {} 8".format(res, val.item(), ('>' if res == 1 else '<')))