In [1]:
import argparse
import random
import mxnet as mx
import mxnet.optimizer as optim
import numpy as np
import os
import utils
import mxnet_dataset as dataset
import models.mxnet_crnn as crnn
from mxnet import nd, autograd, gluon

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument('--train-root', required=True, help='path to dataset')
parser.add_argument('--val-root', required=True, help='path to dataset')
parser.add_argument('--workers', type=int, help='number of data loading workers', default=2)
parser.add_argument('--batch-size', type=int, default=64, help='input batch size')
parser.add_argument('--image-height', type=int, default=32, help='the height of the input image to network')
parser.add_argument('--image-width', type=int, default=100, help='the width of the input image to network')
parser.add_argument('--nh', type=int, default=256, help='size of the lstm hidden state')
parser.add_argument('--nepoch', type=int, default=25, help='number of epochs to train for')
# TODO(meijieru): epoch -> iter
parser.add_argument('--cuda', action='store_true', help='enables cuda')
parser.add_argument('--ngpu', type=int, default=1, help='number of GPUs to use')
parser.add_argument('--pretrained', default='', help="path to pretrained model (to continue training)")
parser.add_argument('--alphabet', type=str, default='0123456789abcdefghijklmnopqrstuvwxyz')
parser.add_argument('--expr-dir', default='expr', help='Where to store samples and models')
parser.add_argument('--display-interval', type=int, default=500, help='Interval to be displayed')
parser.add_argument('--n-test-disp', type=int, default=10, help='Number of samples to display when test')
parser.add_argument('--val-interval', type=int, default=500, help='Interval to be displayed')
parser.add_argument('--save-interval', type=int, default=500, help='Interval to be displayed')
parser.add_argument('--lr', type=float, default=0.01, help='learning rate for Critic, not used by adadealta')
parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5')
parser.add_argument('--adam', action='store_true', help='Whether to use adam (default is rmsprop)')
parser.add_argument('--adadelta', action='store_true', help='Whether to use adadelta (default is rmsprop)')
parser.add_argument('--keep-ratio', action='store_true', help='whether to keep ratio for image resize')
parser.add_argument('--manual-seed', type=int, default=1234, help='reproduce experiemnt')
parser.add_argument('--random-sample', action='store_true', help='whether to sample the dataset with random sampler')
# --adadelta --train-root D:\tmp\2858 --val-root D:\tmp\2859-6 --cuda --workers 0
opt = parser.parse_args(['--adadelta', '--train-root', r'D:\tmp\2858', '--val-root', r'D:\tmp\2859-6', '--cuda', '--workers', '0'])
print(opt)

Namespace(adadelta=True, adam=False, alphabet='0123456789abcdefghijklmnopqrstuvwxyz', batch_size=64, beta1=0.5, cuda=True, display_interval=500, expr_dir='expr', image_height=32, image_width=100, keep_ratio=False, lr=0.01, manual_seed=1234, n_test_disp=10, nepoch=25, ngpu=1, nh=256, pretrained='', random_sample=False, save_interval=500, train_root='D:\\tmp\\2858', val_interval=500, val_root='D:\\tmp\\2859-6', workers=0)


In [3]:
CTCLoss = mx.gluon.loss.CTCLoss

if not os.path.exists(opt.expr_dir):
    os.makedirs(opt.expr_dir)

random.seed(opt.manual_seed)
np.random.seed(opt.manual_seed)
mx.random.seed(opt.manual_seed)

if mx.context.num_gpus() and not opt.cuda:
    print("WARNING: You have a CUDA device, so you should probably run with --cuda")

if opt.cuda:
    ctx = mx.gpu()
else:
    ctx = mx.cpu()

In [4]:
train_dataset = dataset.lmdbDataset(root=opt.train_root)
assert train_dataset
if not opt.random_sample:
    sampler = dataset.randomSequentialSampler(train_dataset, opt.batch_size, ctx)
else:
    sampler = None
train_loader = mx.gluon.data.DataLoader(
    train_dataset, batch_size=opt.batch_size,
    shuffle=sampler is None, sampler=sampler,
    num_workers=int(opt.workers),
    batchify_fn=dataset.alignCollate(image_height=opt.image_height, image_width=opt.image_width, keep_ratio=opt.keep_ratio))
test_dataset = dataset.lmdbDataset(
    root=opt.val_root, transform=dataset.resizeNormalize((100, 32)))

nclass = len(opt.alphabet) + 1
nc = 1

converter = utils.strLabelConverter(opt.alphabet)
criterion = CTCLoss()

In [5]:
net = crnn.CRNN(opt.image_height, nc, nclass, opt.nh)
net.initialize(mx.init.Xavier(), ctx=ctx)
if opt.pretrained != '':
    print('loading pretrained model from %s' % opt.pretrained)
    # TODO
print(net)

CRNN(
  (cnn): Sequential(
    (conv0): Conv2D(None -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu0): Activation(relu)
    (pooling0): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False, global_pool=False, pool_type=max, layout=NCHW)
    (conv1): Conv2D(None -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu1): Activation(relu)
    (pooling1): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False, global_pool=False, pool_type=max, layout=NCHW)
    (conv2): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (batchnorm2): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=None)
    (relu2): Activation(relu)
    (conv3): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu3): Activation(relu)
    (pooling2): MaxPool2D(size=(2, 2), stride=(2, 1), padding=(0, 1), ceil_mode=False, global_pool=False, pool_type=max, layout=NC

In [6]:
image = nd.zeros((opt.batch_size, 3, opt.image_height, opt.image_height), ctx)
text = nd.array((opt.batch_size * 5), ctx)
length = nd.array((opt.batch_size,), ctx)

# set up optimizer
if opt.adam:
    optimizer = optim.Adam(learning_rate=opt.lr, beta1=opt.beta1)
elif opt.adadelta:
    optimizer = optim.AdaDelta()
else:
    optimizer = optim.RMSProp(learning_rate=opt.lr)

In [7]:
def trainBatch(net, criterion, optimizer, train_iter):
    data = next(train_iter)
    cpu_images, cpu_texts = data
    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)

    preds = net(image)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    cost = criterion(preds, text, preds_size, length) / batch_size
    net.zero_grad()
    cost.backward()
    optimizer.step()
    return cost


for epoch in range(opt.nepoch):
    train_iter = iter(train_loader)
    i = 0
    while i < len(train_loader):
        cost = trainBatch(net, criterion, optimizer, train_iter)
        loss_avg.add(cost)
        i += 1

        if i % opt.displayInterval == 0:
            print('[%d/%d][%d/%d] Loss: %f' %
                  (epoch, opt.nepoch, i, len(train_loader), loss_avg.val()))
            loss_avg.reset()

        if i % opt.valInterval == 0:
            val(net, test_dataset, criterion)

        # do checkpointing
        if i % opt.saveInterval == 0:
            torch.save(
                net.state_dict(), '{0}/netCRNN_{1}_{2}.pth'.format(opt.expr_dir, epoch, i))

TypeError: 'Image' object is not iterable