In [1]:
import sys
from PIL import Image
import numpy as np
import csv
import torch
from torch import nn
from torch.autograd import Variable
import cv2
import datetime
import math

LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ"
batsize=10
num_epochs=65
def toonehot(text):
    labellist = []
    for letter in text:
        onehot = [0 for _ in range(34)]
        num = LETTERSTR.find(letter)
        onehot[num] = 1
        labellist.extend(onehot)  #every label size is 1*170, according with LETTERSTR
#         labellist.append(onehot)
    return labellist

In [2]:
def vgg_block(num_convs, in_channels, out_channels):
    net = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1), nn.ReLU(True)]

    for i in range(num_convs - 1):  # 定义后面的许多层
        net.append(nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1))
        net.append(nn.ReLU(True))
    net.append(nn.BatchNorm2d(out_channels))
    net.append(nn.MaxPool2d(2, 2))  # 定义池化层
    net.append(nn.Dropout(0.3))
    return nn.Sequential(*net)


# 将模型打印出来看一下结构
block_demo = vgg_block(3, 3,256)
# print(block_demo)

# #首先定义输入为（1， 64， 300， 300）
# input_demo = Variable(torch.zeros(1,128,7, 25))
# print(input_demo.shape)
# output_demo = block_demo(input_demo)
# print(output_demo.shape)

In [3]:
# 下面我们定义一个函数对这个 vgg block 进行堆叠
def vgg_stack(num_convs, channels):
    net = []
#     print(zip(num_convs, channels))
    for n, c in zip(num_convs, channels):
        in_c = c[0]
        out_c = c[1]
        net.append(vgg_block(n, in_c, out_c))
    return nn.Sequential(*net)


# 作为实例，我们定义一个稍微简单一点的 vgg 结构，其中有 8 个卷积层
vgg_net = vgg_stack((2, 2, 2, 1), ((3, 32), (32, 64), (64, 128), (128, 256)))
# print(vgg_net)

#我们可以看到网络结构中有个 5 个 最大池化，说明图片的大小会减少 5 倍，我们可以验证一下，输入一张 256 x 256
#的图片看看结果是什么
test_x = Variable(torch.zeros(1, 3, 60, 200))
test_y = vgg_net(test_x)
# print(test_y.shape)

In [4]:
class vgg(nn.Module):
    def __init__(self):
        super(vgg, self).__init__()
        self.feature = vgg_net
        self.fc = nn.Sequential(
            nn.Linear(3*12*256, 2560),
            nn.Dropout(0.3),
            nn.Linear(2560, 170)# 34*5=170
        )

    def forward(self, x):
        x = self.feature(x)
        x = x.view(x.shape[0], -1)
        x = self.fc(x)
        return x

    # 然后我们可以训练我们的模型看看在 cifar10 上的效果
    def data_tf(x):
        x = np.array(x, dtype='float32') / 255
        x = (x - 0.5) / 0.5
        x = x.transpose((2, 0, 1))  ## 将 channel 放到第一维，只是 pytorch 要求的输入方式
        x = torch.from_numpy(x)
        return x
    
net = vgg()
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
criterion = nn.MultiLabelSoftMarginLoss()
# print(net)

In [5]:
print("Reading training data...")
traincsv = open('data/5_imitate_train_set/captcha_train.csv', 'r', encoding = 'utf8')
train_data = np.stack([np.array(cv2.imread("data/5_imitate_train_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(traincsv)])
train_data = train_data.transpose(0,3,1,2)
traincsv = open('data/5_imitate_train_set/captcha_train.csv', 'r', encoding = 'utf8')    
train_label = [toonehot(row[1]) for row in csv.reader(traincsv)]
# print(train_label[0:2])
print("Shape of train data:", train_data.shape)
# print("Shape of train label:", len(train_label))

print("Reading validation data...")
valicsv = open('data/5_imitate_vali_set/captcha_vali.csv', 'r', encoding = 'utf8')
vali_data = np.stack([np.array(Image.open("./data/5_imitate_vali_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(valicsv)])
vali_data = vali_data.transpose(0,3,1,2)
valicsv = open('data/5_imitate_vali_set/captcha_vali.csv', 'r', encoding = 'utf8')
vali_label = [toonehot(row[1]) for row in csv.reader(valicsv)]
# vali_label = [[] for _ in range(5)]
# for arr in read_label:
#     for index in range(5):
#         vali_label[index].append(arr[index])
# vali_label = [arr for arr in np.asarray(vali_label)]
print("Shape of validation data:", vali_data.shape)
# print("Shape of validation label:", vali_label[0].shape)


Reading training data...
Shape of train data: (9999, 3, 60, 200)
Reading validation data...
Shape of validation data: (200, 3, 60, 200)


In [6]:
def get_acc(output, label):
    correct_num =0
    for i in range(output.size()[0]):
        c0 = np.argmax(output[i, 0:34].data.cpu().numpy())
        c1 = np.argmax(output[i, 34:68].data.cpu().numpy())
        c2 = np.argmax(output[i, 68:102].data.cpu().numpy())
        c3 = np.argmax(output[i, 102:136].data.cpu().numpy())
        c4 = np.argmax(output[i, 136:170].data.cpu().numpy())
        c = '%s/%s/%s/%s/%s' % (c0, c1, c2, c3,c4)
        l0 = np.argmax(label[i, 0:34].data.cpu().numpy())
        l1 = np.argmax(label[i, 34:68].data.cpu().numpy())
        l2 = np.argmax(label[i, 68:102].data.cpu().numpy())
        l3 = np.argmax(label[i, 102:136].data.cpu().numpy())
        l4 = np.argmax(label[i, 136:170].data.cpu().numpy())
        l = '%s/%s/%s/%s/%s' % (l0, l1, l2, l3,l4)
        if l==c:
            correct_num += 1        
    return float(correct_num)/ len(output)

In [7]:
if torch.cuda.is_available():
    net = net.cuda()
prev_time = datetime.datetime.now()
iters = int(math.ceil(train_data.shape[0]/batsize)) 
for epoch in range(num_epochs):
    train_loss = 0
    train_acc = 0
    net = net.train()              
    for i in range(iters):
        temp_acc = 0
        im = train_data[i*batsize: (i+1)*batsize]
        im = torch.tensor(im).float()
        label = train_label[i*batsize: (i+1)*batsize]
        label = torch.tensor(label).float()
        if torch.cuda.is_available():                    
            im = Variable(im.cuda())
            label = Variable(label.cuda())
        else:
            im = Variable(im)
            label = Variable(label)
        # forward
        output = net(im)
#         print(output.shape)
#         print(label.shape)
        loss = criterion(output, label)
        # forward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
#         print(output.shape)
#         print(label.shape)
        train_acc += get_acc(output, label)
        
    cur_time = datetime.datetime.now()
    h, remainder = divmod((cur_time - prev_time).seconds, 3600)
    m, s = divmod(remainder, 60)
    time_str = "Time %02d:%02d:%02d" % (h, m, s)
    prev_time = cur_time
    epoch_str = ("Epoch %4d. Train Loss: %f, Train Acc: %f, " %
                     (epoch, train_loss / len(train_data),
                      train_acc / iters))
    print(epoch_str + time_str)

Epoch    0. Train Loss: 0.015149, Train Acc: 0.000000, Time 00:01:44
Epoch    1. Train Loss: 0.011305, Train Acc: 0.000200, Time 00:01:43
Epoch    2. Train Loss: 0.008697, Train Acc: 0.014511, Time 00:01:43
Epoch    3. Train Loss: 0.005528, Train Acc: 0.180211, Time 00:01:43
Epoch    4. Train Loss: 0.003443, Train Acc: 0.468578, Time 00:01:43
Epoch    5. Train Loss: 0.002295, Train Acc: 0.659078, Time 00:01:43
Epoch    6. Train Loss: 0.001760, Train Acc: 0.741778, Time 00:01:43
Epoch    7. Train Loss: 0.001321, Train Acc: 0.822800, Time 00:01:43
Epoch    8. Train Loss: 0.001129, Train Acc: 0.864900, Time 00:01:43
Epoch    9. Train Loss: 0.001151, Train Acc: 0.849700, Time 00:01:43
Epoch   10. Train Loss: 0.000889, Train Acc: 0.895100, Time 00:01:43
Epoch   11. Train Loss: 0.000843, Train Acc: 0.899800, Time 00:01:43
Epoch   12. Train Loss: 0.000868, Train Acc: 0.906300, Time 00:01:43
Epoch   13. Train Loss: 0.000712, Train Acc: 0.926189, Time 00:01:42
Epoch   14. Train Loss: 0.000694, 

In [8]:
torch.save(net,'./models/model')

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


In [9]:
net = torch.load('./models/model')

In [10]:
def get_vali_acc(output, label):
    correct_num =0
    for i in range(output.size()[0]):
        c0 = np.argmax(output[i, 0:34].data.cpu().numpy())
        c1 = np.argmax(output[i, 34:68].data.cpu().numpy())
        c2 = np.argmax(output[i, 68:102].data.cpu().numpy())
        c3 = np.argmax(output[i, 102:136].data.cpu().numpy())
        c4 = np.argmax(output[i, 136:170].data.cpu().numpy())
        c = '%s/%s/%s/%s/%s' % (c0, c1, c2, c3,c4)
        l0 = np.argmax(label[i, 0:34].data.cpu().numpy())
        l1 = np.argmax(label[i, 34:68].data.cpu().numpy())
        l2 = np.argmax(label[i, 68:102].data.cpu().numpy())
        l3 = np.argmax(label[i, 102:136].data.cpu().numpy())
        l4 = np.argmax(label[i, 136:170].data.cpu().numpy())
        l = '%s/%s/%s/%s/%s' % (l0, l1, l2, l3,l4)
        print(c,l)
        if l==c:
            correct_num += 1        
    return float(correct_num)/ len(output)

In [11]:
net.eval()
vali_acc = 0
for i in range(int(math.ceil(len(vali_data)/batsize))):
    im = vali_data[i*batsize: (i+1)*batsize]
    im = torch.tensor(im).float()
    label = vali_label[i*batsize: (i+1)*batsize]
    label = torch.tensor(label).float()
    if torch.cuda.is_available():                    
        im = Variable(im.cuda())
        label = Variable(label.cuda())
    else:
        im = Variable(im)
        label = Variable(label)
    output = net(im)
    vali_acc += get_vali_acc(output, label)

6/29/1/28/6 6/29/1/28/6
6/20/28/31/4 6/20/28/31/4
27/31/2/11/7 27/31/2/11/7
20/7/26/14/18 20/7/26/14/18
17/11/2/11/8 17/11/2/11/8
3/27/32/6/0 3/27/29/6/0
24/21/21/19/11 24/21/21/19/11
33/19/19/10/21 33/19/19/10/21
20/30/16/7/10 20/30/16/7/10
14/3/0/12/25 14/3/0/12/25
22/2/1/2/27 22/2/1/2/27
7/6/32/3/20 7/6/32/3/20
0/23/22/6/13 0/23/22/6/13
23/10/16/30/17 23/10/16/30/17
7/20/15/6/14 7/20/15/6/14
5/25/17/18/24 5/25/17/18/24
29/15/8/20/20 29/15/8/20/20
31/21/15/10/17 31/21/15/10/17
21/16/24/27/5 21/16/24/27/5
16/1/11/11/27 16/1/11/11/27
28/21/9/17/31 28/21/9/17/31
18/27/10/14/28 18/27/10/14/28
10/16/7/30/8 10/16/7/30/8
30/13/4/31/10 30/13/4/31/10
3/8/21/29/22 3/8/21/29/22
8/22/29/11/22 8/22/29/11/22
32/5/12/13/23 32/5/12/13/23
4/10/11/9/23 4/10/11/9/23
31/33/23/0/25 31/33/23/0/25
19/4/7/1/31 19/4/7/1/31
19/12/10/31/15 19/12/10/31/15
9/6/24/5/29 9/6/24/5/29
17/26/30/23/28 17/26/30/23/28
33/24/16/23/9 33/24/16/23/9
25/32/21/2/22 25/32/21/2/22
14/3/12/7/7 14/3/12/7/29
19/7/29/12/6 19/7/29/12

In [12]:
vali_acc = vali_acc/int(math.ceil(len(vali_data)/batsize))
epoch_str = ("Vali Acc: %f"%vali_acc)
print(epoch_str)

Vali Acc: 0.930000
