In [11]:
import torch
from itertools import product

In [12]:
net = torch.nn.Linear(2, 1)
net_delta = torch.nn.Linear(2, 1)

v = torch.Tensor([1, 2, 3])
r = 0.001

net_delta.weight.data = net.weight.data + r * v[:2]
net_delta.bias.data = net.bias.data + r * v[2:]

In [13]:
torch.manual_seed(2019)
x = torch.rand(10, 2)
y = x.sum(1)
def loss(y_hat, y):
    return ((y_hat.view(-1) - y.view(-1)) * (y_hat.view(-1) - y.view(-1))).sum() / y.numel()

In [14]:
E = loss(net(x), y)
E_delta = loss(net_delta(x), y)

In [15]:
E.backward()
E_delta.backward()

In [16]:
print(net.weight.grad)
print(net.bias.grad)

tensor([[-1.2273, -1.2045]])
tensor([-2.3064])


In [17]:
print(net_delta.weight.grad)
print(net_delta.bias.grad)

tensor([[-1.2225, -1.2004]])
tensor([-2.2976])


In [18]:
w = torch.cat((net.weight.grad, net.bias.grad.unsqueeze(0)), 1).squeeze()
w_delta = torch.cat((net_delta.weight.grad, net_delta.bias.grad.unsqueeze(0)), 1).squeeze()

In [19]:
# Exact Hessian

h11 = 2 * (x[:, 0] * x[:, 0]).sum() / x.size(0)
h21 = h12 = 2 * (x[:, 0] * x[:, 1]).sum() / x.size(0)
h31 = h13 = 2 * x[:, 0].sum() / x.size(0)

h22 = 2 * (x[:, 1] * x[:, 1]).sum() / x.size(0)
h32 = h23 = 2 * x[:, 1].sum() / x.size(0)

h33 = 2

H = torch.zeros(3, 3)

for i, j in product(range(3), range(3)):
    exec("H[%d, %d] = h%d%d" % (i, j, i+1, j+1))
    
print(H)

tensor([[0.6830, 0.4263, 1.0876],
        [0.4263, 0.5262, 0.8746],
        [1.0876, 0.8746, 2.0000]])


In [20]:
print("Estimated dot product ->", (w_delta - w) / r)
print("Actual dot product    ->", H.matmul(v))

Estimated dot product -> tensor([4.7984, 4.1026, 8.8367])
Actual dot product    -> tensor([4.7984, 4.1026, 8.8368])
