In [1]:
import torch
import numpy as np

# Gradient Calculation

* Backward Propagation

In [9]:
# optimization을 위해 gradient를 계산하고 싶은 tensor에는 requires_grad=True를 설정해줘야 함. (default = False)
# pytorch에서 x에 대한 gradient를 tracking하기 위해 x.grad에 gradient를 저장함.
x = torch.randn(3, requires_grad=True)
print(x) 

y = x + 2 # 
print(y) # grad_fn=<AddBackward0>

z = y * y * 2
print(z) # grad_fn=<MulBackward0>

# vector Jacobian product(= chain rule) to get the gradient of the loss w.r.t. the input x
# so in this case since our z is a scalar, the output of this operation will be a vector of the same shape as x
z = z.mean() # (1, 3)을 (1, 1) = scalar로 만들어주기 위함. (1, 3)이었다면 Error.
print(z) # grad_fn=<MeanBackward0>

z.backward() # dz/dx
print(x.grad) 

tensor([-1.5981, -0.8342,  1.3817], requires_grad=True)
tensor([0.4019, 1.1658, 3.3817], grad_fn=<AddBackward0>)
tensor([ 0.3230,  2.7184, 22.8725], grad_fn=<MulBackward0>)
tensor(8.6379, grad_fn=<MeanBackward0>)
tensor([0.5358, 1.5545, 4.5090])


In [12]:
# optimization을 위해 gradient를 계산하고 싶은 tensor에는 requires_grad=True를 설정해줘야 함. (default = False)
# pytorch에서 x에 대한 gradient를 tracking하기 위해 x.grad에 gradient를 저장함.
x = torch.randn(3, requires_grad=True)
print(x) 

y = x + 2 
print(y) # grad_fn=<AddBackward0>

z = y * y * 2
print(z) # grad_fn=<MulBackward0>

# z = z.mean() # (1, 3) 

# z와 같은 shape의 tensor를 만들어줌.
v = torch.tensor([0.1, 1.0, 0.001], dtype=torch.float32) 

z.backward(v) 
print(x.grad) 

tensor([-0.1240, -0.8386,  0.2818], requires_grad=True)
tensor([1.8760, 1.1614, 2.2818], grad_fn=<AddBackward0>)
tensor([ 7.0389,  2.6977, 10.4135], grad_fn=<MulBackward0>)
None
tensor([0.7504, 4.6456, 0.0091])


# Preventing Gradient History

* Three ways how we can stop pytorch from creating the gradient functions and tracking the history in our computational graph

## 1. x.requires_grad_(False)

In [19]:
x = torch.randn(3, requires_grad = True)
print(x)

x.requires_grad_(False) # in-place operation
print(x) # requires_grad = True가 이제 보이지 않음

tensor([-1.3992, -0.5867,  0.0301], requires_grad=True)
tensor([-1.3992, -0.5867,  0.0301])


## 2. x.detach()

In [21]:
x = torch.randn(3, requires_grad = True)
print(x)

x.detach() # requires_grad = False인 tensor를 return함.
print(x) 

y  = x.detach()
print(y)

tensor([ 0.6700, -1.7655,  0.6098], requires_grad=True)
tensor([ 0.6700, -1.7655,  0.6098], requires_grad=True)
tensor([ 0.6700, -1.7655,  0.6098])


## 3. with torch.no_grad() :

In [26]:
x = torch.randn(3, requires_grad = True)
print(x)

y = x + 2
print(y)

with torch.no_grad() : # with문 안에서는 gradient를 계산하지 않음.
    y = x + 2
    print(y) # grad_fn=<AddBackward0>이 여기서는 나오지 않음.


tensor([-0.4035,  0.4721,  1.4661], requires_grad=True)
tensor([1.5965, 2.4721, 3.4661], grad_fn=<AddBackward0>)
tensor([1.5965, 2.4721, 3.4661])


# .backward() 주의사항 : (acuumulated grad)

In [36]:
weights = torch.ones(4, requires_grad=True)

# .backward() will accumulate gradients into .grad instead of overwriting
# pytorch에서는 gradient를 더해주는 것이 default임.
# 그래서 정확한 gradient를 얻지 못하게 된다.
for epoch in range(3) :
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad) 

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])


In [37]:
weights = torch.ones(4, requires_grad=True)

# 따라서 정확한 gradient를 얻기 위해서는 .backward()를 호출할 때마다 .grad를 0으로 초기화해줘야 함.
for epoch in range(3) :
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad) 
    
    weights.grad.zero_() # gradient를 0으로 초기화해줘야 함.

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
