In [1]:
import torch.nn as nn
import torch
import torch.optim as optim
import torch.utils.data as Data
from torch.utils.data import Dataset
import collections


from torch.autograd import Variable
import pandas as pd
import numpy as np
import cv2
import os
from matplotlib import pyplot as plt
from warpctc_pytorch import CTCLoss

In [2]:
class strLabelConverter(object):
    """Convert between str and label.

    NOTE:
        Insert `blank` to the alphabet for CTC.

    Args:
        alphabet (str): set of the possible characters.
        ignore_case (bool, default=True): whether or not to ignore all of the case.
    """

    def __init__(self, alphabet, ignore_case=True):
        self._ignore_case = ignore_case
        if self._ignore_case:
            alphabet = alphabet.lower()
        self.alphabet = alphabet + '-'  # for `-1` index

        self.dict = {}
        for i, char in enumerate(alphabet):
            # NOTE: 0 is reserved for 'blank' required by wrap_ctc
            self.dict[char] = i + 1

    def encode(self, text):
        """Support batch or single str.

        Args:
            text (str or list of str): texts to convert.

        Returns:
            torch.IntTensor [length_0 + length_1 + ... length_{n - 1}]: encoded texts.
            torch.IntTensor [n]: length of each text.
        """
        if isinstance(text, str):
            text = [
                self.dict[char.lower() if self._ignore_case else char]
                for char in text
            ]
            length = [len(text)]
        elif isinstance(text, collections.Iterable):
            length = [len(s) for s in text]
            text = ''.join(text)
            text, _ = self.encode(text)
        return (torch.IntTensor(text), torch.IntTensor(length))

    def decode(self, t, length, raw=False):
        """Decode encoded texts back into strs.

        Args:
            torch.IntTensor [length_0 + length_1 + ... length_{n - 1}]: encoded texts.
            torch.IntTensor [n]: length of each text.

        Raises:
            AssertionError: when the texts and its length does not match.

        Returns:
            text (str or list of str): texts to convert.
        """
        if length.numel() == 1:
            length = length[0]
            assert t.numel() == length, "text with length: {} does not match declared length: {}".format(t.numel(), length)
            if raw:
                return ''.join([self.alphabet[i - 1] for i in t])
            else:
                char_list = []
                for i in range(length):
                    if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])):
                        char_list.append(self.alphabet[t[i] - 1])
                return ''.join(char_list)
        else:
            # batch mode
            assert t.numel() == length.sum(), "texts with length: {} does not match declared length: {}".format(t.numel(), length.sum())
            texts = []
            index = 0
            for i in range(length.numel()):
                l = length[i]
                texts.append(
                    self.decode(
                        t[index:index + l], torch.IntTensor([l]), raw=raw))
                index += l
            return texts

In [3]:
class averager(object):
    """Compute average for `torch.Variable` and `torch.Tensor`. """

    def __init__(self):
        self.reset()

    def add(self, v):
        if isinstance(v, Variable):
            count = v.data.numel()
            v = v.data.sum()
        elif isinstance(v, torch.Tensor):
            count = v.numel()
            v = v.sum()

        self.n_count += count
        self.sum += v

    def reset(self):
        self.n_count = 0
        self.sum = 0

    def val(self):
        res = 0
        if self.n_count != 0:
            res = self.sum / float(self.n_count)
        return res

In [4]:
class BidirectionalLSTM(nn.Module):

    def __init__(self, nIn, nHidden, nOut):
        super(BidirectionalLSTM, self).__init__()

        self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True)
        self.embedding = nn.Linear(nHidden * 2, nOut)

    def forward(self, input):
        recurrent, _ = self.rnn(input)
        T, b, h = recurrent.size()
        t_rec = recurrent.view(T * b, h)

        output = self.embedding(t_rec)  # [T * b, nOut]
        output = output.view(T, b, -1)

        return output

In [5]:
class CRNN(nn.Module):

    def __init__(self, imgH, nc, nclass, nh, n_rnn=2, leakyRelu=False):
        super(CRNN, self).__init__()
        assert imgH % 16 == 0, 'imgH has to be a multiple of 16'

        ks = [3, 3, 3, 3, 3, 3, 2]
        ps = [1, 1, 1, 1, 1, 1, 0]
        ss = [1, 1, 1, 1, 1, 1, 1]
        nm = [64, 128, 256, 256, 512, 512, 512]

        cnn = nn.Sequential()

        def convRelu(i, batchNormalization=False):
            nIn = nc if i == 0 else nm[i - 1]
            nOut = nm[i]
            cnn.add_module('conv{0}'.format(i),
                           nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i]))
            if batchNormalization:
                cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut))
            if leakyRelu:
                cnn.add_module('relu{0}'.format(i),
                               nn.LeakyReLU(0.2, inplace=True))
            else:
                cnn.add_module('relu{0}'.format(i), nn.ReLU(True))

        convRelu(0)
        cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2))  # 64x16x64
        convRelu(1)
        cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2))  # 128x8x32
        convRelu(2, True)
        convRelu(3)
        cnn.add_module('pooling{0}'.format(2),
                       nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 256x4x16
        convRelu(4, )
        convRelu(5)
        cnn.add_module('pooling{0}'.format(3),
                       nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 512x2x16
        convRelu(6)  # 512x1x16

        self.cnn = cnn
        self.rnn = nn.Sequential(
            BidirectionalLSTM(512, nh, nh),
            BidirectionalLSTM(nh, nh, nclass))

    def forward(self, input):
        # conv features
        conv = self.cnn(input)
        b, c, h, w = conv.size()
        assert h == 1, "the height of conv must be 1"
        conv = conv.squeeze(2)
        conv = conv.permute(2, 0, 1)  # [w, b, c]

        # rnn features
        output = self.rnn(conv)

        return output

In [6]:
model = CRNN(32, 1, 19, 256)

In [7]:
model

CRNN(
  (cnn): Sequential(
    (conv0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu0): ReLU(inplace)
    (pooling0): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu1): ReLU(inplace)
    (pooling1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (batchnorm2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU(inplace)
    (conv3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu3): ReLU(inplace)
    (pooling2): MaxPool2d(kernel_size=(2, 2), stride=(2, 1), padding=(0, 1), dilation=1, ceil_mode=False)
    (conv4): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu4): ReLU(inplace)
    (conv5): Conv2d(512, 512, kernel_size=(3, 3), 

In [8]:
alphabet = '零壹貳參肆伍陸柒捌玖拾佰仟萬億兆元整'

In [67]:
nclass = len(alphabet) + 1
nc = 1
converter = strLabelConverter(alphabet)

batchSize=2

In [68]:
'''
image = torch.FloatTensor(batchSize, 1, 32, 32)
text = torch.IntTensor(batchSize * 5)
length = torch.IntTensor(batchSize)
image = Variable(image)
text = Variable(text)
length = Variable(length)
'''

'\nimage = torch.FloatTensor(batchSize, 1, 32, 32)\ntext = torch.IntTensor(batchSize * 5)\nlength = torch.IntTensor(batchSize)\nimage = Variable(image)\ntext = Variable(text)\nlength = Variable(length)\n'

In [69]:
data = []
for g in os.listdir('/Users/chienan/Job/CNN/cut/'):
    data.append(plt.imread('./cut/%s'%(g)))
data = data[:10]

In [70]:
# loss averager
loss_avg = averager()

optimizer = optim.Adam(model.parameters(), lr=0.01126,betas=(0.5, 0.999))

In [71]:
y_train = ["壹萬元整","陸萬元整","參拾貳萬肆仟元整","伍萬元整","壹萬元整","參仟肆佰陸拾肆元整","參仟伍佰壹拾參元整","參仟肆佰貳拾元整","壹萬元整","貳拾參萬陸仟元整"]

In [72]:
class Data(Dataset):
    def __init__(self, data=None, label=None):
        self.data = data
        self.label = label
        self.nSamples = len(data)


    def __len__(self):
        return self.nSamples

    def __getitem__(self, index):
        assert index <= len(self), 'index range error'
        img = self.data[index]
        label = self.label[index]

        return (img, label)

In [73]:
class alignCollate(object):

    def __init__(self, imgH=32, imgW=100, keep_ratio=False, min_ratio=1):
        self.imgH = imgH
        self.imgW = imgW

    def __call__(self, batch):
        images, labels = zip(*batch)

        return images, labels

In [74]:
X = np.array([i.reshape(1,32,100) for i in data])/255

In [75]:
X = torch.FloatTensor(X)
#y = torch.IntTensor(y)
X = Variable(X)
#y = Variable(y)
#torch_dataset = Data.TensorDataset(X,y)

In [76]:
train_dataset = Data(data=X,label=y_train)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batchSize,shuffle=True,num_workers=1)
                                           #,
                                          #collate_fn=alignCollate)

In [77]:
criterion = CTCLoss()

In [78]:
# loss averager
loss_avg = averager()

In [79]:
optimizer = optim.Adam(model.parameters(), lr=0.01126,betas=(0.5, 0.999))

In [80]:
imgH=32
image = torch.FloatTensor(batchSize, 3, imgH, imgH)
text = torch.IntTensor(batchSize * 5)
length = torch.IntTensor(batchSize)
image = Variable(image)
text = Variable(text)
length = Variable(length)

In [81]:
preds = model(cpu_images)
preds_size = Variable(torch.IntTensor([batch_size] * batch_size))

In [82]:
total_iteration = 10
for iteration in range(total_iteration):
    train_iter = iter(train_loader)
    i = 0
    while i < len(train_loader):
        for p in model.parameters():
            p.requires_grad = True
        model.train()
        data = train_iter.next()
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        preds = model(cpu_images)
        preds_size = Variable(torch.IntTensor([batch_size] * batch_size))
        t, l = converter.encode(cpu_texts)
        length.data.resize_(l.size()).copy_(l)
        text.data.resize_(t.size()).copy_(t)
        cost = criterion(preds, text, preds_size, length) / batch_size
        model.zero_grad()
        cost.backward()
        optimizer.step()
        loss_avg.add(cost)
        i += 1
        
        
        

        if i % 1 == 0:
            print('[iteration%d/%d][%d/%d] Loss: %f' %
                  (iteration+1, total_iteration, i, len(train_loader), loss_avg.val() ))
            loss_avg.reset()

[iteration1/10][1/5] Loss: 0.000000
[iteration1/10][2/5] Loss: 0.000000
[iteration1/10][3/5] Loss: 0.000000
[iteration1/10][4/5] Loss: 0.000000
[iteration1/10][5/5] Loss: 0.000000
[iteration2/10][1/5] Loss: 0.000000
[iteration2/10][2/5] Loss: 0.000000
[iteration2/10][3/5] Loss: 0.000000
[iteration2/10][4/5] Loss: 0.000000
[iteration2/10][5/5] Loss: 0.000000
[iteration3/10][1/5] Loss: 0.000000
[iteration3/10][2/5] Loss: 0.000000
[iteration3/10][3/5] Loss: 0.000000
[iteration3/10][4/5] Loss: 0.000000
[iteration3/10][5/5] Loss: 0.000000
[iteration4/10][1/5] Loss: 0.000000
[iteration4/10][2/5] Loss: 0.000000
[iteration4/10][3/5] Loss: 0.000000
[iteration4/10][4/5] Loss: 0.000000
[iteration4/10][5/5] Loss: 0.000000
[iteration5/10][1/5] Loss: 0.000000
[iteration5/10][2/5] Loss: 0.000000
[iteration5/10][3/5] Loss: 0.000000
[iteration5/10][4/5] Loss: 0.000000
[iteration5/10][5/5] Loss: 0.000000
[iteration6/10][1/5] Loss: 0.000000
[iteration6/10][2/5] Loss: 0.000000
[iteration6/10][3/5] Loss: 0

In [83]:
criterion(preds, text, preds_size, length) / batch_size

tensor([0.], grad_fn=<DivBackward0>)

In [58]:
text

tensor([ 4, 11,  3, 14,  5, 13, 17, 18], dtype=torch.int32)

In [66]:
preds.shape

torch.Size([26, 1, 19])

In [61]:
preds_size

tensor([1], dtype=torch.int32)

In [62]:
length

tensor([8], dtype=torch.int32)

In [63]:
t

tensor([ 4, 11,  3, 14,  5, 13, 17, 18], dtype=torch.int32)

In [64]:
cpu_texts

('參拾貳萬肆仟元整',)