In [1]:
import xshinnosuke as xs

# Simple Add Multiply, etc

In [5]:
xs.seed(0)
a = xs.randn(3, 4, requires_grad=True)
b = xs.ones(4, 2, requires_grad=True)
c = xs.randn(1, 3, requires_grad=True)
y = xs.tensor(10, requires_grad=True)

In [6]:
# (3, 2) = (3, 4) * (4, 2)
x1 = a * b
# (2, 3) = (2, 3) * (1, 3)
x2 = x1.t() * c
# (2, )
x3 = x2.mean(axis=1)
x4 = x3.sum()
loss = (x4 - y) ** 2
print(loss)

Variable(105.37674866848246, requires_grad=True, grad_fn=<PowBackward>)


In [7]:
x4.retain_grad()
x3.retain_grad()
x2.retain_grad()
x1.retain_grad()
loss.backward()

print(x4.grad)
print(x3.grad)
print(x2.grad)
print(x1.grad)

-20.53063551558816
[-20.53063552 -20.53063552]
[[-6.84354517 -6.84354517 -6.84354517]
 [-6.84354517 -6.84354517 -6.84354517]]
[[2.44424024 8.97854219]
 [0.73130437 2.44424024]
 [8.97854219 0.73130437]]


# Fully Connected Demo

In [8]:
from xshinnosuke.layers import Dense

xs.seed(0)
inputs = xs.randn(1, 5, requires_grad=True)

In [9]:
fc_layer = Dense(out_features=2)
pred1 = fc_layer(inputs)
loss1 = pred1.sum()
print('loss1: ', loss1)
loss1.backward()
print('inputs grad: ', inputs.grad)

loss1:  Variable(0.10376752480048798, requires_grad=True, grad_fn=<SumBackward>)
inputs grad:  [[-0.15511417  0.03160216  0.13236604 -0.16691323  0.04000644]]


In [10]:
# empty inputs' gradient
inputs.zero_grad()
# get the same parameters of Dense
weight, bias = fc_layer.variables
# manually apply fully connection operation
pred2 = inputs.dot(weight) + bias
loss2 = pred2.sum()
print('loss2: ', loss2)
loss2.backward()
print('inputs grad: ', inputs.grad)

loss2:  Variable(0.10376752480048798, requires_grad=True, grad_fn=<SumBackward>)
inputs grad:  [[-0.15511417  0.03160216  0.13236604 -0.16691323  0.04000644]]


# Convolutional Demo

In [2]:
from xshinnosuke.layers import Conv2D

# (batch, channels, height, width)
inputs = xs.randn(1, 1, 5, 5, requires_grad=True)
kernel_size = 3

In [3]:
conv_layer = Conv2D(out_channels=1, kernel_size=kernel_size, use_bias=False)
pred1 = conv_layer(inputs)
loss1 = pred1.mean()
print('loss1: ', loss1)
loss1.backward()
print('inputs grad: ', inputs.grad)

loss1:  Variable(0.09235512061338991, requires_grad=True, grad_fn=<MeanBackward>)
inputs grad:  [[[[ 0.00259424 -0.01035408  0.00319068  0.00059644  0.01354475]
   [ 0.00427474  0.01671126  0.02760327  0.02332854  0.01089201]
   [-0.00902506 -0.00531487  0.01823331  0.02725837  0.02354817]
   [-0.01161929  0.00503921  0.01504263  0.02666193  0.01000342]
   [-0.0132998  -0.02202613 -0.00936997  0.00392983  0.01265616]]]]


In [4]:
inputs.zero_grad()
weight, _ = conv_layer.variables
pred2 = xs.zeros(3, 3, requires_grad=True)
for i in range(3):
    for j in range(3):
        pred2[i, j] = (inputs[:, :, i: i + kernel_size, j: j + kernel_size] * weight).sum()
loss2 = pred2.mean()
print('loss2: ', loss2)
loss2.backward()
print('inputs grad: ', inputs.grad)

loss2:  Variable(0.09235512061338988, requires_grad=True, grad_fn=<MeanBackward>)
inputs grad:  [[[[ 0.00259424 -0.01035408  0.00319068  0.00059644  0.01354475]
   [ 0.00427474  0.01671126  0.02760327  0.02332854  0.01089201]
   [-0.00902506 -0.00531487  0.01823331  0.02725837  0.02354817]
   [-0.01161929  0.00503921  0.01504263  0.02666193  0.01000342]
   [-0.0132998  -0.02202613 -0.00936997  0.00392983  0.01265616]]]]


# Pooling Demo

In [5]:
from xshinnosuke.layers import MaxPooling2D

# (batch, channels, height, width)
inputs = xs.randn(1, 1, 4, 4, requires_grad=True)
kernel_size = 2
stride = 2

In [6]:
pool_layer = MaxPooling2D(kernel_size=kernel_size, stride=stride)
pred1 = pool_layer(inputs)
loss1 = pred1.mean()
print('loss1: ', loss1)
loss1.backward()
print('inputs grad: ', inputs.grad)

loss1:  Variable(1.0085125512257545, requires_grad=True, grad_fn=<MeanBackward>)
inputs grad:  [[[[0.   0.   0.   0.25]
   [0.25 0.   0.   0.  ]
   [0.   0.   0.25 0.  ]
   [0.   0.25 0.   0.  ]]]]


In [7]:
inputs.zero_grad()
pred2 = xs.zeros(2, 2, requires_grad=True)
for i in range(2):
    for j in range(2):
        h_start = i * stride
        h_end = h_start + kernel_size
        w_start = j * stride
        w_end = w_start + kernel_size
        pred2[i, j] = inputs[:, :, h_start: h_end, w_start: w_end].max()
loss2 = pred2.mean()
print('loss2: ', loss2)
loss2.backward()
print('inputs grad: ', inputs.grad)

loss2:  Variable(1.0085125512257545, requires_grad=True, grad_fn=<MeanBackward>)
inputs grad:  [[[[0.   0.   0.   0.25]
   [0.25 0.   0.   0.  ]
   [0.   0.   0.25 0.  ]
   [0.   0.25 0.   0.  ]]]]
