In [1]:
# http://docs.chainer.org/en/stable/tutorial/basic.html

import numpy as np
import chainer
from chainer import cuda, Function, gradient_check, Variable, optimizers, serializers, utils
from chainer import Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L

In [2]:
x_data = np.array([5], dtype=np.float32)
x = Variable(x_data)

In [3]:
x

<variable at 0x106558450>

In [4]:
y = x**2 - 2*x + 1

In [5]:
y

<variable at 0x106558d10>

In [6]:
y.data

array([ 16.], dtype=float32)

In [7]:
y.backward() # Runs reverse-mode automatic differentiation (back prop)

In [9]:
x.grad # gradient is stored in x

array([ 8.], dtype=float32)

In [10]:
z = 2*x
y = x**2 - z + 1
y.backward(retain_grad=True)
print z.grad
print x.grad

[-1.]
[ 16.]


In [11]:
x = Variable(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32))
y = x**2 - 2*x + 1
y.grad = np.ones((2, 3), dtype=np.float32) # set grad here, which will be brought back to x
y.backward()

x.grad

array([[  0.,   2.,   4.],
       [  6.,   8.,  10.]], dtype=float32)

In [32]:
x = Variable(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32))

f = F.Linear(3, 2) # linear link (x*W + b) from 3 dim to 2 dim. Takes input: (minibatch, 3)

print f.W.data # Stores the parameters W and b
print f.b.data

y = f(x)

print y.data

y.grad = np.ones((2, 2), dtype=np.float32)
f.zerograds() # Gradients accumulate with backward(), have to zero them out?

y.backward()
print f.W.grad
print f.b.grad

[[ 1.4166379   0.6831221  -0.32881439]
 [-0.14903226  0.48673159 -1.64617753]]
[ 0.  0.]
[[ 1.79643905 -4.11410189]
 [ 7.10927582 -8.03953648]]
[[ 5.  7.  9.]
 [ 5.  7.  9.]]
[ 2.  2.]


In [44]:
# For an arbitrary number of child links use ChainList
class MyChain(ChainList):
    def __init__(self):
        super(MyChain, self).__init__(
            L.Linear(4, 3), # Notice each child link is not named! :-)
            L.Linear(3, 2),
        )

    def __call__(self, x):
        h = self[0](x)
        return self[1](h)

In [45]:
model = MyChain()
optimizer = optimizers.SGD()
optimizer.setup(model)

In [46]:
optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005)) # Example hook function, called before update