In [19]:
import numpy as np
import torch

PATH_images = "./cropped/"
BATCH_SIZE = 64
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
LETTER_BOX = ["BLANK", '1', '2', '3', '4', '5', '6', '7', '8', '9', '0',
 ' ', '$', '%', '&', "'", '(', ')', '+', ',', '-', '.', '_',
 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z',
 'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',]

import json, os
_,_,image_list = next(os.walk(PATH_images))
image_list.sort()
image_list_train = image_list[:1150000]
image_list_test = image_list[1150000:]

In [15]:
from torch.utils.data import Dataset,DataLoader
from torchvision.transforms import PILToTensor
from torch.optim import Adam
from PIL import Image
from torch import nn

In [16]:
class DatasetWDT(Dataset):
    def __init__(self, image_list):
        self.image_list = image_list
        self.length = len(self.image_list)
        self.pil2tensor = PILToTensor()
    def __len__(self):
        return self.length
    def __getitem__(self, item):
        listt = []
        img, label = self._getitem(item)
        listt.append((img,label))
        width, height = img.size
        while len(listt)<BATCH_SIZE:
            item = item+1 if item+1<self.length else 0
            img, label = self._getitem(item)
            if width-16<img.size[0]<width+16:
                img = img.resize((width, height))
                listt.append((img,label))
        imgs = torch.cat([self.pil2tensor(img)[None] for img,_ in listt], dim=0)/256
        targets = torch.cat([self._makelabel(label) for _,label in listt], dim=0)
        target_lengths = torch.tensor([len(i[1]) for i in listt])
        return imgs, targets, target_lengths
    def _getitem(self, item):
        img = Image.open(PATH_images+self.image_list[item])
        img = self._resize(img)
        label = self.image_list[item].split("_")[-2]
        return img, label
    def _resize(self, img, height=32):
        width = int(img.size[0]/img.size[1]*height+0.5)
        img = img.resize((width,height))
        return img
    def _makelabel(self, label):
        numbers = [LETTER_BOX.index(i) for i in label]
        numbers = torch.tensor(numbers)
        return numbers

dataset = DatasetWDT(image_list_train)
dataloader = DataLoader(dataset, num_workers=0, shuffle=True, collate_fn=lambda x:x[0])

In [17]:
class BidirectionalLSTM(nn.Module):
    def __init__(self, nInput_size, nHidden, nOut):
        super(BidirectionalLSTM, self).__init__()
        self.lstm = nn.LSTM(nInput_size, nHidden, bidirectional=True)
        self.linear = nn.Linear(nHidden*2, nOut)
    def forward(self, input):
        recurrent, (hidden,cell) = self.lstm(input)
        T, b, h = recurrent.size()
        t_rec = recurrent.view(T * b, h)
        output = self.linear(t_rec)  # [T*b, nOut]
        output = output.view(T, b, -1) # [seq,batch,nClass]
        return output
class CNN(nn.Module):
    def __init__(self, imageHeight, nChannel):
        super().__init__()
        # 0
        self.depth_conv0 = nn.Conv2d(nChannel,nChannel,3,1,1,groups=nChannel)
        self.point_conv0 = nn.Conv2d(nChannel,64,3,1,1,groups=1)
        self.relu0 = nn.ReLU(inplace=True)
        self.pool0 = nn.MaxPool2d(kernel_size=2,stride=2)
        # 1
        self.depth_conv1 = nn.Conv2d(64,64,3,1,1,groups=64)
        self.point_conv1 = nn.Conv2d(64,128,1,1,0,groups=1)
        self.relu1 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2)
        # 2
        self.depth_conv2 = nn.Conv2d(128,128,3,1,1,groups=128)
        self.point_conv2 = nn.Conv2d(128,256,1,1,0,groups=1)
        self.batchNorm2 = nn.BatchNorm2d(256)
        self.pool2 = nn.MaxPool2d(kernel_size=2,stride=2)
        self.relu2 = nn.ReLU(inplace=True)
        # 3
        self.depth_conv3 = nn.Conv2d(256, 256, 3, 1, 1, groups=256)
        self.point_conv3 = nn.Conv2d(256, 256, 1, 1, 0, groups=1)
        self.relu3 = nn.ReLU(inplace=True)
        self.pool3 = nn.MaxPool2d(kernel_size=(2,2),stride=(2,1),padding=(0,1))
        # 4
        self.depth_conv4 = nn.Conv2d(256, 256, 3, 1, 1, groups=256)
        self.point_conv4 = nn.Conv2d(256, 512, 1, 1, 0, groups=1)
        self.batchNorm4 = nn.BatchNorm2d(512)
        self.relu4 = nn.ReLU(inplace=True)
        # 5
        self.depth_conv5 = nn.Conv2d(512, 512, 3, 1, 1, groups=512)
        self.point_conv5 = nn.Conv2d(512, 512, 1, 1, 0, groups=1)
        self.relu5 = nn.ReLU(inplace=True)
        self.pool5 = nn.MaxPool2d(kernel_size=(2,2),stride=(2,1),padding=(0,1))
        # 6
        self.depth_conv6 = nn.Conv2d(512, 512, 2, 1, 0, groups=512)
        self.point_conv6 = nn.Conv2d(512, 512, 1, 1, 0, groups=1)
        self.batchNorm6 = nn.BatchNorm2d(512)
        self.relu6= nn.ReLU(inplace=True)
    def forward(self,input):
        depth0 = self.depth_conv0(input)
        point0 = self.point_conv0(depth0)
        relu0 = self.relu0(point0)
        pool0 = self.pool0(relu0)
        # print(pool0.size())
        depth1 = self.depth_conv1(pool0)
        point1 = self.point_conv1(depth1)
        relu1 = self.relu1(point1)
        pool1 = self.pool1(relu1)
        # print(pool1.size())
        depth2 = self.depth_conv2(pool1)
        point2 = self.point_conv2(depth2)
        batchNormal2 = self.batchNorm2(point2)
        relu2 = self.relu2(batchNormal2)
        pool2 = self.pool2(relu2)
        #print(relu2.size())
        depth3 = self.depth_conv3(pool2)
        point3 = self.point_conv3(depth3)
        relu3 = self.relu3(point3)
        pool3 = self.pool3(relu3)
        #print(pool3.size())
        depth4 = self.depth_conv4(pool3)
        point4 = self.point_conv4(depth4)
        batchNormal4 = self.batchNorm4(point4)
        relu4 = self.relu4(batchNormal4)
        #print(relu4.size())
        depth5 = self.depth_conv5(relu4)
        point5 = self.point_conv5(depth5)
        relu5 = self.relu5(point5)
        pool5 = self.pool5(relu5)
        #print(pool5.size())
        depth6 = self.depth_conv6(pool5)
        point6 = self.point_conv6(depth6)
        batchNormal6 = self.batchNorm6(point6)
        relu6 = self.relu6(batchNormal6)
        #print(relu6.size())
        return relu6
class CRNN(nn.Module):
    def __init__(self,imgHeight, nChannel, nClass, nHidden):
        super().__init__()
        self.cnn = nn.Sequential(CNN(imgHeight, nChannel))
        self.lstm = nn.Sequential(
            BidirectionalLSTM(512, nHidden, nHidden),
            BidirectionalLSTM(nHidden, nHidden, nClass) )
    def forward(self,input):
        conv = self.cnn(input)
        conv = conv.squeeze(dim=2) # BCHW->BCW
        conv = conv.permute(2,0,1) # BCW->WBC
        output = self.lstm(conv)
        return output

from crnn_pytorch_master.models.crnn import CRNN as CRNN2

model = CRNN2(32,3,75,256).to(DEVICE)

In [18]:
ctc_loss = nn.CTCLoss(blank=0, reduction='mean')
optimizer = Adam(params=model.parameters(), lr=0.005)

In [13]:
for i,(imgs,targets,target_lengths) in enumerate(dataloader):
    output = model(imgs)
    output = output.log_softmax(2).requires_grad_()
    input_lengths = torch.tensor([output.shape[0]]*output.shape[1])
    loss = ctc_loss(output, targets, input_lengths, target_lengths)
    loss.backward()
    print(loss)

tensor(15.6158, grad_fn=<MeanBackward0>)
tensor(17.7431, grad_fn=<MeanBackward0>)
tensor(18.8357, grad_fn=<MeanBackward0>)
tensor(17.9409, grad_fn=<MeanBackward0>)
tensor(13.0935, grad_fn=<MeanBackward0>)
tensor(15.8836, grad_fn=<MeanBackward0>)
tensor(15.3290, grad_fn=<MeanBackward0>)
tensor(15.5376, grad_fn=<MeanBackward0>)
tensor(26.8936, grad_fn=<MeanBackward0>)
tensor(17.4084, grad_fn=<MeanBackward0>)
tensor(16.7651, grad_fn=<MeanBackward0>)
tensor(29.8160, grad_fn=<MeanBackward0>)
tensor(14.7432, grad_fn=<MeanBackward0>)
tensor(12.5799, grad_fn=<MeanBackward0>)
tensor(17.2561, grad_fn=<MeanBackward0>)
tensor(22.2359, grad_fn=<MeanBackward0>)
tensor(12.2951, grad_fn=<MeanBackward0>)
tensor(23.6368, grad_fn=<MeanBackward0>)
tensor(15.3916, grad_fn=<MeanBackward0>)
tensor(13.9096, grad_fn=<MeanBackward0>)
tensor(25.9034, grad_fn=<MeanBackward0>)
tensor(18.9957, grad_fn=<MeanBackward0>)
tensor(15.8345, grad_fn=<MeanBackward0>)
tensor(24.4262, grad_fn=<MeanBackward0>)
tensor(18.0715, 


KeyboardInterrupt



In [10]:
import os
import torch
import cv2
from torchvision import transforms
from torch.utils.data import Dataset,DataLoader
from crnn_new import crnn
import time

class resizeAndNormalize():
    def __init__(self,size,interpolation=cv2.INTER_LINEAR):
        self.size = size
        self.interpolation = interpolation
        self.toTensor = transforms.ToTensor()

    def __call__(self, image):
        image = cv2.resize(image,self.size,interpolation=self.interpolation)
        image = self.toTensor(image)
        image = image.sub_(0.5).div_(0.5)
        return image

class CRNNDataSet(Dataset):
    def __init__(self,imageRoot,labelRoot):
        self.image_root = imageRoot
        self.image_dict = self.readfile(labelRoot)
        self.image_name = [fileName for fileName,_ in self.image_dict.items()]

    def __getitem__(self, index):
        image_path = os.path.join(self.image_root,self.image_name[index])
        keys = self.image_dict.get(self.image_name[index])
        label = [int(x) for x in keys]

        image = cv2.imread(image_path,cv2.IMREAD_GRAYSCALE)
        (height,width) = image.shape

        size_height = 32
        ratio = 32/float(height)
        size_width = int(ratio * width)
        transform = resizeAndNormalize((size_width,size_height))
        image = transform(image)
        label = torch.IntTensor(label)

        return image,label

    def __len__(self):
        return len(self.image_name)

    def readfile(self,fileName):
        res = []
        with open(fileName, 'r') as f:
            lines = f.readlines()
            for line in lines:
                res.append(line.strip())
        dic = {}
        total = 0
        for line in res:
            part = line.split(' ')
            if  not os.path.exists(os.path.join(self.image_root, part[0])):
                print(os.path.join(self.image_root, part[0]))
                total += 1
            else:
                dic[part[0]] = part[1:]
        print(total)

        return dic

trainData = CRNNDataSet(imageRoot="D:\BaiduNetdiskDownload\Synthetic_Chinese_String_Dataset\images\\",
                          labelRoot="D:\BaiduNetdiskDownload\Synthetic_Chinese_String_Dataset\lables\data.txt")

trainLoader = DataLoader(dataset=trainData,batch_size=30,shuffle=True,num_workers=0)

valData = CRNNDataSet(imageRoot="D:\BaiduNetdiskDownload\Synthetic_Chinese_String_Dataset\images\\",
                          labelRoot="D:\BaiduNetdiskDownload\Synthetic_Chinese_String_Dataset\lables\data_t.txt")

valLoader = DataLoader(dataset=valData,batch_size=1,shuffle=True,num_workers=1)

def decode(preds):
    pred = []
    for i in range(len(preds)):
        if preds[i] != 5989 and ((i == 5989) or (i != 5989 and preds[i] != preds[i-1])):
            pred.append(int(preds[i]))
    return pred


def val(model, loss_function, max_iteration,use_gpu=True):
    model.eval()
    k = 0
    totalloss = 0
    correct_num = 0
    total_num = 0
    val_iter = iter(valLoader)
    max_iter = min(max_iteration,len(valLoader))

    for i in range(max_iter):
        k = k + 1
        data,label = val_iter.next()
        labels = torch.IntTensor([])
        for j in range(label.size(0)):
            labels = torch.cat((labels,label[j]),0)

        if torch.cuda.is_available() and use_gpu:
            data = data.cuda()
        output = model(data)
        input_lengths = torch.IntTensor([output.size(0)] * int(output.size(1)))
        target_lengths = torch.IntTensor([label.size(1)] * int(label.size(0)))
        loss = loss_function(output,labels,input_lengths,target_lengths) /  label.size(0)
        totalloss += float(loss)
        pred_label = output.max(2)[1]
        pred_label = pred_label.transpose(1,0).contiguous().view(-1)
        pred = decode(pred_label)
        total_num += len(pred)
        for x,y in zip(pred,labels):
            if int(x) == int(y):
                correct_num += 1
    accuracy = correct_num / float(total_num) * 100
    test_loss = totalloss / k
    print('Test loss : %.3f , accuary : %.3f%%' % (test_loss, accuracy))


def train():
    use_gpu = True
    learning_rate = 0.0005
    weight_decay = 1e-4
    max_epoch = 10
    modelpath = 'F:\crnn_model\pytorch-crnn.pth'

    char_set = open('../train/char_std_5990.txt','r',encoding='utf-8').readlines()
    char_set = ''.join([ch.strip('\n') for ch in char_set[1:]] +['卍'])
    n_class = len(char_set)

    model = crnn.CRNN(imgHeight=32,nChannel=1,nClass=n_class,nHidden=256)
    if torch.cuda.is_available() and use_gpu:
        model.cuda()

    loss_func = torch.nn.CTCLoss(blank=n_class-1)
    optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate,weight_decay=weight_decay)

    if os.path.exists(modelpath):
        print("load model from %s" % modelpath)
        model.load_state_dict(torch.load(modelpath))
        print("done!")

    lossTotal = 0.0
    k = 0
    printInterval = 100
    valinterval = 1000
    start_time = time.time()
    for epoch in range(max_epoch):

        for i,(data,label) in enumerate(trainLoader):

            k = k + 1
            model.train()

            labels = torch.IntTensor([])
            for j in range(label.size(0)):
                labels = torch.cat((labels,label[j]),0)

            if torch.cuda.is_available and use_gpu:
                data = data.cuda()
                loss_func = loss_func.cuda()
                labels = labels.cuda()

            output = model(data)


            #log_probs = output
            log_probs = output.log_softmax(2).detach().requires_grad_()
            targets = labels
            input_lengths = torch.IntTensor([output.size(0)] * int(output.size(1)))
            target_lengths = torch.IntTensor([label.size(1)] * int(label.size(0)))

            #forward(self, log_probs, targets, input_lengths, target_lengths)
            loss = loss_func(log_probs,targets,input_lengths,target_lengths) / label.size(0)
            lossTotal += float(loss)

            if k % printInterval == 0:

                print("[%d/%d] [%d/%d] loss:%f" % (
                epoch, max_epoch, i + 1, len(trainLoader), lossTotal/printInterval))
                lossTotal = 0.0
                torch.save(model.state_dict(), 'F:\crnn_model\pytorch-crnn.pth')

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if k % valinterval == 0:
                val(model,loss_func)

    end_time = time.time()
    print("takes {}s".format((end_time - start_time)))


if __name__ == '__main__':
    train()


{'axes': {'x-axis': {'tick-type': 'markers',
                     'ticks': [{'id': 10, 'tick_pt': {'x': 103, 'y': 190}},
                               {'id': 11, 'tick_pt': {'x': 142, 'y': 190}},
                               {'id': 12, 'tick_pt': {'x': 177, 'y': 190}},
                               {'id': 13, 'tick_pt': {'x': 215, 'y': 190}},
                               {'id': 14, 'tick_pt': {'x': 252, 'y': 190}},
                               {'id': 15, 'tick_pt': {'x': 291, 'y': 190}},
                               {'id': 16, 'tick_pt': {'x': 327, 'y': 190}},
                               {'id': 17, 'tick_pt': {'x': 367, 'y': 190}},
                               {'id': 18, 'tick_pt': {'x': 403, 'y': 190}},
                               {'id': 19, 'tick_pt': {'x': 441, 'y': 190}}],
                     'values-type': 'categorical'},
          'y-axis': {'tick-type': 'markers',
                     'ticks': [{'id': 3, 'tick_pt': {'x': 83, 'y': 53}},
                        