In [None]:
import os
import numpy as np

In [None]:
import chainer
import chainer.functions as F
from chainer import function, initializers
import chainer.links as L
from chainer import cuda

In [None]:
class KMaxPooling1D(function.Function):
    
    def __init__(self, ndim, k):
        if ndim <= 0:
            raise ValueError(
                'pooling operation requires at least one spatial dimension.')

        self.ndim = ndim
        self.k = k

        self._used_cudnn = False

    def forward_gpu(self, x):
        if chainer.should_use_cudnn('>=auto') and 2 <= self.ndim <= 3:
            # With cuDNN v3 or greater, use cuDNN implementation for inputs
            # with spatial dimensions of two or more.
            return super(KMaxPooling1D, self).forward_gpu(x)

        self.retain_inputs(())
        self._in_shape = x[0].shape
        self._in_dtype = x[0].dtype

        n, c = x[0].shape[:2]
        dims = x[0].shape[2:]
        ys = (self.k,)

        y_shape = (n, c) + ys
        y = cuda.cupy.empty(y_shape, dtype=x[0].dtype)
        self.indexes = cuda.cupy.empty(y_shape, dtype=np.int32)
        
        cuda.elementwise('raw T in, int32 d_0, int32 out_0',
                         'T out, S indexes', 
                         '''int c0 = i / (out_0);
                            int out_x_0 = i % out_0;
                            int in_x0_0 = 0;
                            int in_x1_0 = d_0;
                            int argmax_0[''' + str(self.k) + '''];
                            T maxval[''' + str(self.k) + '''];
                            for (int a = 0; a < out_0; ++a) {
                              maxval[a] = (T)-(1.0/0.0);
                              argmax_0[a] = -1;
                            }
                            for (int a = 0; a < out_0; ++a) {
                              for (int x_0 = in_x0_0; x_0 < in_x1_0; ++x_0) {
                                int offset_0 = 1 * (x_0 + d_0 * c0);
                                int found = 0;
                                for (int b = 0; b < a; ++b) {
                                  if (argmax_0[b] == x_0) {
                                    found = 1;
                                    break;
                                  }
                                }
                                if (found) {
                                  continue;
                                }
                                T v = in[offset_0];
                                if (maxval[a] < v) {
                                  maxval[a] = v;
                                  argmax_0[a] = x_0;
                                }
                              }
                            }
                            for (int a = 0; a < out_0; ++a) {
                              for (int b = a + 1; b < out_0; ++b) {
                                if (argmax_0[a] > argmax_0[b]) {
                                  T tmpval = maxval[a];
                                  int tmpindex = argmax_0[a];
                                  maxval[a] = maxval[b];
                                  argmax_0[a] = argmax_0[b];
                                  maxval[b] = tmpval;
                                  argmax_0[b] = tmpindex;
                                }
                              }
                            }
                            out = maxval[i % out_0];
                            int argmax_k_0 = argmax_0[i % out_0];
                            indexes = argmax_k_0;
                         ''',
                         'k_max_pool_1d_fwd')(
            x[0].reduced_view(),
            *(dims + ys +
              (y, self.indexes)))
                
        return y,


    def backward_gpu(self, x, gy):
        if self._used_cudnn:
            return super(KMaxPooling1D, self).backward_gpu(x, gy)

        n, c = self._in_shape[:2]
        dims = self._in_shape[2:]
        ys = gy[0].shape[2:]
        gx = cuda.cupy.empty(self._in_shape, self._in_dtype)

        ndim = self.ndim
        cuda.elementwise('raw T gy, raw S indexes, int32 d_0, int32 out_0',
                         'T gx',
                         '''operation:
                            int c0  = i / (d_0);
                            int x_0 = i % d_0;
                            int out_x0_0 = 0;
                            int out_x1_0 = out_0;
                            T val = 0;
                            for (int out_x_0 = out_x0_0; out_x_0 < out_x1_0; ++out_x_0) {
                              int offset_0 = 1 * (out_x_0 + out_0 * c0);
                              int kx = x_0;
                              if (indexes[offset_0] == kx) {
                                val = val + gy[offset_0];
                              }
                            }
                            gx = val;
                         ''',
                         'k_max_pool_1d_bwd')(
            gy[0].reduced_view(), self.indexes.reduced_view(),
            *(dims + ys + (gx,)))
        
        return gx,

In [None]:
def k_max_pooling_1d(x, k):
    ndim = len(x.shape[2:])
    return KMaxPooling1D(ndim, k)(x)

In [None]:
class BottleNeckA(chainer.Chain):

    def __init__(self, in_size, ch_size):
        initialW = initializers.HeNormal()
        super(BottleNeckA, self).__init__(
            conv1 = L.ConvolutionND(
                1, in_size, ch_size, 3, 1, 1, initialW=initialW),
            bn1 = L.BatchNormalization(ch_size),
            conv2 = L.ConvolutionND(
                1, ch_size, ch_size, 3, 1, 1, initialW=initialW),
            bn2 = L.BatchNormalization(ch_size),
            conv3 = L.ConvolutionND(
                1, in_size, ch_size, 1, 1, 0,
                initialW=initialW, nobias=True),
            bn3 = L.BatchNormalization(ch_size)
        )

    def __call__(self, x):
        h1 = F.relu(self.bn1(self.conv1(x)))
        h1 = self.bn2(self.conv2(h1))
        h2 = self.bn3(self.conv3(x))

        return F.relu(h1 + h2)

In [None]:
class BottleNeckB(chainer.Chain):

    def __init__(self, ch_size):
        initialW = initializers.HeNormal()
        super(BottleNeckB, self).__init__(
            conv1 = L.ConvolutionND(
                1, ch_size, ch_size, 3, 1, 1, initialW=initialW),
            bn1 = L.BatchNormalization(ch_size),
            conv2 = L.ConvolutionND(
                1, ch_size, ch_size, 3, 1, 1, initialW=initialW),
            bn2 = L.BatchNormalization(ch_size)
        )

    def __call__(self, x):
        h = F.relu(self.bn1(self.conv1(x)))
        h = self.bn2(self.conv2(h))

        return F.relu(h + x)

In [None]:
class ConvBlock(chainer.ChainList):
    
    def __init__(self, layer, in_size, ch_size):
        super(ConvBlock, self).__init__()
        if in_size != ch_size:
            self.add_link(BottleNeckA(in_size, ch_size))
        else:
            self.add_link(BottleNeckB(ch_size))

        for i in range(layer - 1):
            self.add_link(BottleNeckB(ch_size))
    
    def __call__(self, x):
        for f in self.children():
            x = f(x)
        return x

In [None]:
class VDCNN(chainer.Chain):
    
    def __init__(self, n_out):
        super(VDCNN, self).__init__(
            embed1 = L.EmbedID(3017, 50, initialW=initializers.HeUniform(), ignore_label=-1),
            conv1 = L.ConvolutionND(1, 50, 64, 3, 1, 1, initialW=initializers.HeNormal()),
            res2 = ConvBlock(2, 64, 64),
            res3 = ConvBlock(2, 64, 128),
            res4 = ConvBlock(2, 128, 256),
            res5 = ConvBlock(2, 256, 512),
            fc6 = L.Linear(4096, 2048),
            fc7 = L.Linear(2048, 2048),
            fc8 = L.Linear(2048, n_out)
        )
    
    def __call__(self, x, t):
        h = self.embed1(x)
        h = h.transpose(0,2,1)
        h = self.conv1(h)
        h = self.res2(h)
        h = F.max_pooling_nd(h, 3, 2, 1, cover_all=False)
        h = self.res3(h)
        h = F.max_pooling_nd(h, 3, 2, 1, cover_all=False)
        h = self.res4(h)
        h = F.max_pooling_nd(h, 3, 2, 1, cover_all=False)
        h = self.res5(h)
        h = k_max_pooling_1d(h, 8)
        h = F.relu(self.fc6(h))
        h = F.relu(self.fc7(h))
        h = self.fc8(h)
        
        if t is not None:
            loss = F.softmax_cross_entropy(h, t)
            chainer.report({'loss': loss, 'accuracy': F.accuracy(h, t)}, self)
            return loss
        else:
            return to_cpu(F.softmax(h).data)

In [None]:
_labels = []
_sentences = []
text_file = open('./data/afpbb-id.txt')
line = text_file.readline()
len_sentence = 0
while line:
    label, sentence = line[0], line.split(' ')[1:-1]
    id_sentence = np.ones(1024, dtype=np.float32) * (-1)
    len_sentence += len(sentence)
    for i, c in enumerate(sentence):
        id_sentence[i] = c
    _labels.append(label)
    _sentences.append(id_sentence)
    line = text_file.readline()
text_file.close()

In [None]:
_sentences = np.array(_sentences, dtype=np.int32)
_labels = np.array(_labels, dtype=np.int32)

In [None]:
_sentences.shape

In [None]:
from chainer.datasets import TupleDataset

In [None]:
is_test = np.arange(len(_labels)) % 10 == 0

train = TupleDataset(_sentences[~is_test], _labels[~is_test])

test = TupleDataset(_sentences[is_test], _labels[is_test])

In [None]:
del _sentences
del _labels

In [None]:
model = VDCNN(4)

In [None]:
from chainer import training
from chainer.training import extensions

In [None]:
train_iter = chainer.iterators.MultiprocessIterator(train, batch_size=50)
test_iter = chainer.iterators.MultiprocessIterator(test, batch_size=50, repeat=False)

optimizer = chainer.optimizers.MomentumSGD()
optimizer.setup(model)
optimizer.add_hook(chainer.optimizer.GradientClipping(5.0))

updater = training.StandardUpdater(train_iter, optimizer, device=0)
trainer = training.Trainer(updater, (30, 'epoch'), out='result/afpbb-30epoch')

class TestModeEvaluator(extensions.Evaluator):
    def evaluate(self):
        model = self.get_target('main')
        with chainer.using_config('train', False):
            ret = super(TestModeEvaluator, self).evaluate()
        return ret

trainer.extend(TestModeEvaluator(test_iter, model, device=0), trigger=(1, 'epoch'))
trainer.extend(extensions.dump_graph('main/loss'))

trainer.extend(extensions.LogReport(trigger=(1, 'epoch')))

trainer.extend(extensions.PrintReport(['epoch', 'iteration', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time']
                                     ), trigger=(1, 'epoch'))

trainer.extend(extensions.ProgressBar(update_interval=30))

trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(3, 'epoch'))

print('running')

trainer.run()

In [None]:
from chainer import serializers
serializers.save_npz('./models/afpbb-epoch30.npz', model)