## Chainer Tutorial

Ref: http://docs.chainer.org/en/latest/tutorial/basic.html#

In [1]:
import numpy as np
import chainer
from chainer import cuda, Function, gradient_check, report, training, utils, Variable
from chainer import datasets, iterators, optimizers, serializers
from chainer import Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L
from chainer.training import extensions

## Forward/Backward Computation

In [2]:
x_data = np.array([5], dtype=np.float32)
x = Variable(x_data)

In [3]:
y = x**2 - 2*x + 1

In [4]:
y.data

array([ 16.], dtype=float32)

In [5]:
# runs error backpropagation
y.backward()

In [6]:
x.grad

array([ 8.], dtype=float32)

In [7]:
# Gradient of intermediate variable
z = 2*x
y = x**2 - z + 1
y.backward(retain_grad=True)
z.grad

array([-1.], dtype=float32)

In [8]:
x = Variable(np.array([[1,2,3],[4,5,6]], dtype=np.float32))
y = x**2 + 2*x + 1
y.grad = np.ones((2,3), dtype=np.float32)
y.backward()
x.grad

array([[  4.,   6.,   8.],
       [ 10.,  12.,  14.]], dtype=float32)

## Links

In [9]:
f = L.Linear(3,2)

In [10]:
f.W.data

array([[ 0.09941121, -0.78036231,  0.34611559],
       [ 0.0280849 ,  0.53641611, -0.10282112]], dtype=float32)

In [11]:
f.b.data

array([ 0.,  0.], dtype=float32)

In [12]:
x = Variable(np.array([[1,2,3], [4,5,6]], dtype=np.float32))
y = f(x)
y.data

array([[-0.4229666 ,  0.79245377],
       [-1.42747307,  2.17749357]], dtype=float32)

In [13]:
f.cleargrads()

In [14]:
y.grad = np.ones((2,2), dtype=np.float32)
y.backward()
f.W.grad

array([[ 5.,  7.,  9.],
       [ 5.,  7.,  9.]], dtype=float32)

In [15]:
f.b.grad

array([ 2.,  2.], dtype=float32)

## Write a model as a chain

In [16]:
l1 = L.Linear(4,3)
l2 = L.Linear(3,2)
def my_forward(x):
    h = l1(x)
    return l2(h)

In [17]:
# Write as a class
class MyProc(object):
    def __init__(self):
        self.l1 = L.Linear(4,3)
        self.l2 = L.Linear(3,2)
    
    def foward(self,x):
        h = self.l1(x)
        return self.l2(h)

In [18]:
# More reusable, support parameter management, CPU/GPU migration, etc
class MyChain(Chain):
    def __init__(self):
        super(MyChain,self).__init__(
            l1=L.Linear(4,3),
            l2=L.Linear(3,2)
        )
    def __call__(self, x):
        h = self.l1(x)
        return self.l2(h)

In [19]:
# Another way to define a chain
class MyChain2(ChainList):
    def __init__(self):
        super(MyChain2, self).__init__(
            L.Linear(4,3),
            L.Linear(3,2)
        )
    def __call__(self,x):
        h = self[0](x)
        return self[1](h)

## Optimizer

In [20]:
model = MyChain()
optimizer = optimizers.SGD()
optimizer.use_cleargrads()
optimizer.setup(model)

In [21]:
optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005))

## Trainer

## Serializer

In [22]:
serializers.save_npz('my.model', model)

In [23]:
serializers.load_npz('my.model', model)

In [24]:
serializers.save_npz('my.state', optimizer)

In [25]:
serializers.load_npz('my.state', optimizer)

## Example: Multi-layer Perceptron on MNIST

In [26]:
train, test = datasets.get_mnist()

In [27]:
train_iter = iterators.SerialIterator(train, batch_size=100, shuffle = True)

In [28]:
test_iter = iterators.SerialIterator(test, batch_size=100, repeat=False, shuffle=False)

In [29]:
class MLP(Chain):
    def __init__(self, n_units, n_out):
        super(MLP, self).__init__(
            # the size of the inputs to each layer will be inferred
            l1=L.Linear(None, n_units),
            l2=L.Linear(None, n_units),
            l3=L.Linear(None, n_out),
        )
        
    def __call__(self, x):
        h1 = F.relu(self.l1(x))
        h2 = F.relu(self.l2(h1))
        y = self.l3(h2)
        return y

In [30]:
model = L.Classifier(MLP(100, 10)) # the input size, 784, is inferred
optimizer = optimizers.SGD()
optimizer.setup(model)

## Build a trainier object

In [31]:
updater = training.StandardUpdater(train_iter, optimizer)
trainer = training.Trainer(updater, (20, 'epoch'), out='result')

In [32]:
trainer.extend(extensions.Evaluator(test_iter, model))
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport(['epoch', 'main/accuracy', 'validation/main/accuracy']))
trainer.extend(extensions.ProgressBar())
trainer.run()

epoch       main/accuracy  validation/main/accuracy
[J     total [..................................................]  0.83%
this epoch [########..........................................] 16.67%
       100 iter, 0 epoch / 20 epochs
       inf iters/sec. Estimated time to finish: 0:00:00.
[4A[J     total [..................................................]  1.67%
this epoch [################..................................] 33.33%
       200 iter, 0 epoch / 20 epochs
    207.52 iters/sec. Estimated time to finish: 0:00:56.861384.
[4A[J     total [#.................................................]  2.50%
this epoch [#########################.........................] 50.00%
       300 iter, 0 epoch / 20 epochs
    205.42 iters/sec. Estimated time to finish: 0:00:56.956418.
[4A[J     total [#.................................................]  3.33%
this epoch [#################################.................] 66.67%
       400 iter, 0 epoch / 20 epochs
    207.39 iters/sec. E