# 2. Variable and Autograd

In [1]:
import torch

## 2.1 Autograd 

In [2]:
x = torch.ones(1)
x.requires_grad

False

In [3]:
y = torch.ones(1)
y.requires_grad

False

In [4]:
z = x + y

In [5]:
z.requires_grad

False

In [6]:
x.requires_grad_()
x.requires_grad

True

In [7]:
z = x + y

In [8]:
z.requires_grad

True

## 2.2 Backward

In [9]:
y = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32, requires_grad=True)
y

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)

In [10]:
y.sum()

tensor(10., grad_fn=<SumBackward0>)

In [11]:
y.max()

tensor(4., grad_fn=<MaxBackward1>)

In [12]:
z = 2*y + 1
z

tensor([[3., 5.],
        [7., 9.]], grad_fn=<AddBackward0>)

In [13]:
print("y.requires_grad :", y.requires_grad)
print("z.requires_grad :", z.requires_grad)

print("")

print("y.grad :", y.grad)
print("z.grad :", z.grad)

print("")

print("y.grad_fn :", y.grad_fn)
print("z.grad_fn :", z.grad_fn)

y.requires_grad : True
z.requires_grad : True

y.grad : None
z.grad : None

y.grad_fn : None
z.grad_fn : <AddBackward0 object at 0x0000020BABE100B8>


In [14]:
# grad can be implicitly created only for scalar outputs
z.backward()

RuntimeError: grad can be implicitly created only for scalar outputs

In [15]:
out = z.sum()
out

tensor(24., grad_fn=<SumBackward0>)

In [16]:
print("y.requires_grad :", y.requires_grad)
print("z.requires_grad :", z.requires_grad)
print("out.requires_grad :", out.requires_grad)

print("")

print("y.grad :", y.grad)
print("z.grad :", z.grad)
print("out.grad :", out.grad)

print("")

print("y.grad_fn :", y.grad_fn)
print("z.grad_fn :", z.grad_fn)
print("out.grad_fn :", out.grad_fn)

y.requires_grad : True
z.requires_grad : True
out.requires_grad : True

y.grad : None
z.grad : None
out.grad : None

y.grad_fn : None
z.grad_fn : <AddBackward0 object at 0x0000020BABE7F9B0>
out.grad_fn : <SumBackward0 object at 0x0000020BABE7FB38>


In [17]:
# By default, gradients are only retained for leaf variables. non-leaf variables
# gradients are not retained to be inspected later. This was done by design, to save memory.

zGrad = torch.zeros(2,2)

def extract(z):
    global zGrad
    zGrad = z
    
# hook the variable's gradient to function input
z.register_hook(extract)
# z.register_hook(print)

<torch.utils.hooks.RemovableHandle at 0x20babe7c1d0>

In [18]:
out.backward(retain_graph=True)

In [19]:
print("y.requires_grad :", y.requires_grad)
print("z.requires_grad :", z.requires_grad)
print("out.requires_grad :", out.requires_grad)

print("")

print("y.grad :", y.grad)
print("z.grad :", z.grad)
print("out.grad :", out.grad)

print("")

print("y.grad_fn :", y.grad_fn)
print("z.grad_fn :", z.grad_fn)
print("out.grad_fn :", out.grad_fn)

y.requires_grad : True
z.requires_grad : True
out.requires_grad : True

y.grad : tensor([[2., 2.],
        [2., 2.]])
z.grad : None
out.grad : None

y.grad_fn : None
z.grad_fn : <AddBackward0 object at 0x0000020BABE82240>
out.grad_fn : <SumBackward0 object at 0x0000020BABE822B0>


In [20]:
print(zGrad)

tensor([[1., 1.],
        [1., 1.]])


In [21]:
# retain_graph = True : accumulated gradient value
out.backward(retain_graph=True)
y.grad

tensor([[4., 4.],
        [4., 4.]])

In [22]:
# initialize gradient
y.grad.zero_()
out.backward()
y.grad

tensor([[2., 2.],
        [2., 2.]])

## 2.3 .detach vs .data (참고)

@ .detach된 tensor는 requires_grad가 False이지만 기존 grad에 연동

In [23]:
a = torch.tensor([1,2,3.], requires_grad = True)
b = a.exp()
c = b.detach()
c.zero_()

tensor([0., 0., 0.])

In [24]:
b

tensor([0., 0., 0.], grad_fn=<ExpBackward>)

In [25]:
# exp 함수는 자기 자신이 역전파에 필요
b.sum().backward()

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation

In [26]:
a.grad

@ .data된 tensor는 grad에 연동을 주지 않음

In [27]:
a = torch.tensor([1,2,3.], requires_grad = True)
b = a.exp()
c = b.data
c.zero_()

tensor([0., 0., 0.])

In [28]:
b

tensor([0., 0., 0.], grad_fn=<ExpBackward>)

In [29]:
b.sum().backward()

In [30]:
a.grad

tensor([0., 0., 0.])