# Pytorch
---
## Tensors
* Tensorflow의 Tensor와 다르지 않다.
  * Numpy의 ndarrays를 기본적으로 활용하고 있다.
  * Numpy의 ndarrays의 대부분의 operation을 사용할 수 있도록 구성되어 있다.
* Numpy의 operation은 CPU만을 이용해 느리지만 Tensor는 CUDA를 활용해 GPU를 이용하기 때문에 빠르게 연산을 진행할 수 있다.

In [1]:
%matplotlib inline

from matplotlib import pyplot as plt

In [2]:
import torch
torch.cuda.is_available()

!nvidia-smi

Fri Jun 17 00:54:13 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.91.03    Driver Version: 460.91.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  TITAN RTX           Off  | 00000000:18:00.0 Off |                  N/A |
| 41%   32C    P8    14W / 280W |      3MiB / 24220MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  TITAN RTX           Off  | 00000000:3B:00.0 Off |                  N/A |
| 40%   32C    P8    16W / 280W |    824MiB / 24220MiB |      0%      Default |
|       

In [3]:
x = torch.rand(10000,100000).to('cuda')

In [4]:
!nvidia-smi

Fri Jun 17 00:54:26 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.91.03    Driver Version: 460.91.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  TITAN RTX           Off  | 00000000:18:00.0 Off |                  N/A |
| 40%   35C    P2    56W / 280W |   4514MiB / 24220MiB |     22%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  TITAN RTX           Off  | 00000000:3B:00.0 Off |                  N/A |
| 41%   32C    P8    16W / 280W |    824MiB / 24220MiB |      0%      Default |
|       

In [5]:
x.dtype
1000000000/1024/1024*4 + 577

4391.697265625

In [6]:
x = torch.Tensor(5,3)
print(x)
print(x.shape)
print(x.size())
print(x.dim())

tensor([[ 1.4013e-45,  0.0000e+00,  1.0825e+38],
        [ 3.0805e-41,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  3.3631e-44],
        [ 4.5622e-41, -1.0658e+11,  4.5622e-41],
        [ 1.4013e-45,  0.0000e+00,  7.0625e-43]])
torch.Size([5, 3])
torch.Size([5, 3])
2


In [7]:
# Construct a matrix with the list
x = torch.tensor([[0,1,2],[3,4,5]])
print(x)
print(x.shape)

tensor([[0, 1, 2],
        [3, 4, 5]])
torch.Size([2, 3])


In [8]:
x = torch.arange(6).reshape(2,3)
print(x)
print(x.shape)

tensor([[0, 1, 2],
        [3, 4, 5]])
torch.Size([2, 3])


In [9]:
# Construct a randomly initialized matrix 
x = torch.rand(5, 3) # np.random.rand
print(x)
print(x.grad)

tensor([[0.6128, 0.3909, 0.1892],
        [0.2209, 0.6444, 0.2064],
        [0.8283, 0.5554, 0.7367],
        [0.1333, 0.0658, 0.5253],
        [0.3632, 0.4416, 0.1197]])
None


In [10]:
# Construct a 5 x 3 matrix, uninitialized (random initialized)
x = torch.Tensor(5, 3)
print(x)

# Construct a randomly initialized matrix 
x = torch.rand(5, 3)
print(x)

# Construct a matrix with the list
x = torch.tensor([[3.5, 4, 5], [1, 2, 3]])
print(x.dtype)

# Get its size
print(x.size())
print(x.shape)

# Get its grad
print(x.grad)

tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 1.0844e+38, 3.0805e-41],
        [1.1518e-03, 3.0805e-41, 9.4018e-30],
        [4.5622e-41, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00]])
tensor([[0.0262, 0.6635, 0.4867],
        [0.9307, 0.9751, 0.4217],
        [0.7110, 0.9057, 0.9989],
        [0.7970, 0.6095, 0.8732],
        [0.0198, 0.3319, 0.6648]])
torch.float32
torch.Size([2, 3])
torch.Size([2, 3])
None


### dtype and device 
 * dtype - Tensor의 데이터 타입
 * device - Tensor의 작업 위치 (cpu or cuda)

In [11]:
x = torch.tensor([[3, 4, 5], [1, 2, 3]], dtype=torch.float64)
print(x)

y = torch.tensor([[3, 4, 5], [1, 2, 3]], dtype=torch.int)
print(y)


print(x + y)

tensor([[3., 4., 5.],
        [1., 2., 3.]], dtype=torch.float64)
tensor([[3, 4, 5],
        [1, 2, 3]], dtype=torch.int32)
tensor([[ 6.,  8., 10.],
        [ 2.,  4.,  6.]], dtype=torch.float64)


In [12]:
x = torch.tensor([[3, 4, 5], [1, 2, 3]], dtype=torch.float32)
print(x)
print(x.dtype)
y = x.double()
print(y)
print(x+y)

tensor([[3., 4., 5.],
        [1., 2., 3.]])
torch.float32
tensor([[3., 4., 5.],
        [1., 2., 3.]], dtype=torch.float64)
tensor([[ 6.,  8., 10.],
        [ 2.,  4.,  6.]], dtype=torch.float64)


In [13]:
x = torch.tensor([[3, 4, 5], [1, 2, 3]], dtype=torch.float32)
print(x.device)
x = x.to(torch.device('cuda'))
print(x.device)
x = x.to(torch.device('cuda:1'))
print(x.device)

cpu
cuda:0
cuda:1


In [14]:
x = x.cuda()
print(x.device)
x = x.cpu()
print(x.device)

cuda:0
cpu


In [15]:
device = 'cuda:1' if torch.cuda.is_available() else 'cpu'
x = x.to(device)

In [16]:
device_0 = torch.device('cuda:0')
device_1 = torch.device('cuda:1')

x = torch.randn(4, 3, dtype=torch.float64)
y = torch.randn(4, 3, dtype=torch.float32)
z = torch.randint(0, 10, (4, 3), dtype=torch.int32)

z = z.to(device_1)

print('Before "to" method')

print(x.dtype, x.device)
print(y.dtype, y.device)
print(z.dtype, z.device, '\n')

Before "to" method
torch.float64 cpu
torch.float32 cpu
torch.int32 cuda:1 



In [17]:
print('After "to" method')
# to method with specific dtype and device 
x = x.to(dtype=torch.int32, device=device_0)

# to method with some tensor 
y = y.to(z)
z = z.to(device='cpu')

print(x.dtype, x.device)
print(y.dtype, y.device)
print(z.dtype, z.device, '\n')

After "to" method
torch.int32 cuda:0
torch.int32 cuda:1
torch.int32 cpu 



### Constructing like Numpy

In [18]:
x = torch.empty(3, 5)
print(x)

x = torch.zeros(3, 5)
print(x)

x = torch.ones(3, 5)
print(x)

x = torch.full((3, 5), 3.1415)
print(x)

tensor([[-7.4923e+10,  4.5622e-41, -7.4923e+10,  4.5622e-41,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  1.6217e-19],
        [ 7.7052e+31,  7.2148e+22,  2.5226e-18,  1.0372e-08,  1.0356e-11]])
tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])
tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])
tensor([[3.1415, 3.1415, 3.1415, 3.1415, 3.1415],
        [3.1415, 3.1415, 3.1415, 3.1415, 3.1415],
        [3.1415, 3.1415, 3.1415, 3.1415, 3.1415]])


In [19]:
x = torch.arange(0, 5, 1)
x = torch.arange(5)
print(x)

y = torch.linspace(0, 5, 9)
print(y)

z = torch.logspace(-10, 10, 5)
print(z)

tensor([0, 1, 2, 3, 4])
tensor([0.0000, 0.6250, 1.2500, 1.8750, 2.5000, 3.1250, 3.7500, 4.3750, 5.0000])
tensor([1.0000e-10, 1.0000e-05, 1.0000e+00, 1.0000e+05, 1.0000e+10])


In [20]:
z = torch.eye(5) # I: Identity Matrix
print(z)

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])


In [21]:
# Construct a 3 x 5 matrix with random value from uniform distribution, i.e. Uniform[0, 1)
x = torch.rand(3, 5)
print(x)

# Construct a 3 x 5 matrix with random value from normal distribution, i.e. Normal(0, 1)
x = torch.randn(3, 5)
print(x)

x = torch.randint(3, 10, (3, 5))
print(x)

tensor([[0.2673, 0.8408, 0.9023, 0.8138, 0.2728],
        [0.0783, 0.4564, 0.1266, 0.1114, 0.2190],
        [0.1715, 0.1478, 0.2119, 0.4236, 0.1932]])
tensor([[ 1.6780, -0.7488, -1.1527, -1.4136, -0.2482],
        [-0.2041,  0.1943,  0.4597, -0.2457, -0.2182],
        [ 0.1025, -0.1163, -1.6372, -1.3694,  0.5092]])
tensor([[8, 5, 9, 7, 7],
        [5, 8, 9, 8, 6],
        [5, 4, 3, 3, 4]])


- From numpy to tensor

In [22]:
import numpy as np
a = np.ones(5)
print(a)
b = torch.from_numpy(a)
c = torch.Tensor(a)
print(b.device)
print(c.device)
print(b.dtype)
print(c.dtype)
d = b.numpy()
e = np.array(b)
print(type(d))
print("\n",a,"\n",b,"\n",c,"\n",d,"\n",e)

[1. 1. 1. 1. 1.]
cpu
cpu
torch.float64
torch.float32
<class 'numpy.ndarray'>

 [1. 1. 1. 1. 1.] 
 tensor([1., 1., 1., 1., 1.], dtype=torch.float64) 
 tensor([1., 1., 1., 1., 1.]) 
 [1. 1. 1. 1. 1.] 
 [1. 1. 1. 1. 1.]


### Operations
* Operations에도 여러가지 syntax가 있다.

In [23]:
x = torch.rand(5, 3)
y = torch.rand(1, 3)
x = x + 3

In [24]:
posco = x + y
print(posco)

tensor([[3.9962, 3.8651, 4.7598],
        [4.0251, 4.0541, 4.9596],
        [3.9308, 3.6283, 4.3541],
        [4.1426, 4.3848, 4.4948],
        [3.5530, 4.4659, 4.1304]])


In [25]:
x = torch.rand(5, 3)
y = torch.rand(5, 3)
print("solution 1 : ", x + y, '\n')


print("solution 2 : ", torch.add(x, y), '\n')


result = torch.Tensor(5, 3)
torch.add(x, y, out=result)
print("solution 3 : ", result, '\n')

y.add_(x) # y = y + x
print("solution 4 : ", y, '\n')

print(x+y)

solution 1 :  tensor([[0.5726, 1.1181, 1.6126],
        [1.0247, 0.9161, 1.4150],
        [1.3466, 1.2237, 0.8758],
        [0.9720, 1.1149, 0.9213],
        [0.1841, 0.5973, 1.3159]]) 

solution 2 :  tensor([[0.5726, 1.1181, 1.6126],
        [1.0247, 0.9161, 1.4150],
        [1.3466, 1.2237, 0.8758],
        [0.9720, 1.1149, 0.9213],
        [0.1841, 0.5973, 1.3159]]) 

solution 3 :  tensor([[0.5726, 1.1181, 1.6126],
        [1.0247, 0.9161, 1.4150],
        [1.3466, 1.2237, 0.8758],
        [0.9720, 1.1149, 0.9213],
        [0.1841, 0.5973, 1.3159]]) 

solution 4 :  tensor([[0.5726, 1.1181, 1.6126],
        [1.0247, 0.9161, 1.4150],
        [1.3466, 1.2237, 0.8758],
        [0.9720, 1.1149, 0.9213],
        [0.1841, 0.5973, 1.3159]]) 

tensor([[0.8928, 1.4121, 2.4509],
        [1.2309, 1.7260, 1.8501],
        [2.1298, 1.6512, 1.4808],
        [1.2215, 1.3166, 1.8186],
        [0.3602, 0.6176, 1.8764]])


### Same indexing as numpy

In [26]:
# indexing 또한 비슷하게
print(x)
print(x[:, [1]], '\n')
print(x>0.5)
print(x[x > 0.5])

tensor([[0.3203, 0.2940, 0.8383],
        [0.2062, 0.8099, 0.4351],
        [0.7832, 0.4274, 0.6050],
        [0.2496, 0.2018, 0.8973],
        [0.1761, 0.0203, 0.5605]])
tensor([[0.2940],
        [0.8099],
        [0.4274],
        [0.2018],
        [0.0203]]) 

tensor([[False, False,  True],
        [False,  True, False],
        [ True, False,  True],
        [False, False,  True],
        [False, False,  True]])
tensor([0.8383, 0.8099, 0.7832, 0.6050, 0.8973, 0.5605])


### Squeeze and Unsqueeze

In [27]:
x = torch.rand(1, 2, 1, 3)
print(x.shape)
print(x)
x = x.squeeze() # [1, 20, 1, 128] -> [20, 128]
print(x.shape)
print(x)

torch.Size([1, 2, 1, 3])
tensor([[[[0.6367, 0.4894, 0.7851]],

         [[0.3169, 0.3582, 0.2569]]]])
torch.Size([2, 3])
tensor([[0.6367, 0.4894, 0.7851],
        [0.3169, 0.3582, 0.2569]])


In [28]:
x2 = torch.rand(1, 20, 1, 128)
print(x2.shape)
x2 = x2.squeeze(dim=2) # [1, 20, 1, 128] -> [1, 20, 128]
print(x2.shape)

torch.Size([1, 20, 1, 128])
torch.Size([1, 20, 128])


In [29]:
print(x.shape)
x = x.unsqueeze(0)
print(x.shape)

torch.Size([2, 3])
torch.Size([1, 2, 3])


### multiplication and concatenation

In [30]:
x = torch.arange(15).reshape(5,3).to(dtype=torch.float32)
y = torch.ones(5, 3)+2
z = x * y
print(x)
print(y)
print(z)

tensor([[ 0.,  1.,  2.],
        [ 3.,  4.,  5.],
        [ 6.,  7.,  8.],
        [ 9., 10., 11.],
        [12., 13., 14.]])
tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]])
tensor([[ 0.,  3.,  6.],
        [ 9., 12., 15.],
        [18., 21., 24.],
        [27., 30., 33.],
        [36., 39., 42.]])


In [31]:
## matrix multiplication
#y = W.T * x + b
z= torch.matmul(x, y.t())
print(x.shape)
print(y.shape)
print(z, z.shape)
w = x @ y.T
print(w)

torch.Size([5, 3])
torch.Size([5, 3])
tensor([[  9.,   9.,   9.,   9.,   9.],
        [ 36.,  36.,  36.,  36.,  36.],
        [ 63.,  63.,  63.,  63.,  63.],
        [ 90.,  90.,  90.,  90.,  90.],
        [117., 117., 117., 117., 117.]]) torch.Size([5, 5])
tensor([[  9.,   9.,   9.,   9.,   9.],
        [ 36.,  36.,  36.,  36.,  36.],
        [ 63.,  63.,  63.,  63.,  63.],
        [ 90.,  90.,  90.,  90.,  90.],
        [117., 117., 117., 117., 117.]])


In [32]:
print(x)
print(y)
z = torch.cat([x, y], dim=0)
print(z)
print(x.shape)
print(y.shape)
print(z.shape)

tensor([[ 0.,  1.,  2.],
        [ 3.,  4.,  5.],
        [ 6.,  7.,  8.],
        [ 9., 10., 11.],
        [12., 13., 14.]])
tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]])
tensor([[ 0.,  1.,  2.],
        [ 3.,  4.,  5.],
        [ 6.,  7.,  8.],
        [ 9., 10., 11.],
        [12., 13., 14.],
        [ 3.,  3.,  3.],
        [ 3.,  3.,  3.],
        [ 3.,  3.,  3.],
        [ 3.,  3.,  3.],
        [ 3.,  3.,  3.]])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([10, 3])


### 넘파이의 다양한 operation들이 토치에 같은 함수나 변형된 함수로 대부분 탑재 되어있음.

---
## PyTorch의 Autograd: automatic differentiation
* Autograd package는 Tensors가 사용할 수 있는 모든 Operation의 Gradient를 자동으로 계산해준다.
* Tensor의 required_grad attribute를 이용해 gradient의 계산여부를 결정할 수 있다.
  * 계산이 완료된 이후에 .backward()를 호출하면 자동으로 gradient를 계산한다.
  * .grad attribute를 통해 마찬가지로 gradient에 접근할 수 있다. 
  * .grad_fn attribute를 통해 해당 Variable이 어떻게 생성되었는지 확인할 수 있다. 해당 값으로 해당 노드의 local gradient 구할 수 있게 됨.
  
  

In [33]:
# Create a variable
x = torch.ones(2, 2, requires_grad=True)

print(x)
print(x.requires_grad)
print(x.grad)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
True
None


In [34]:
y = x + 2
print(y)
z = y * y * 3
print(z)
out = z.mean()
print(out)

out.retain_grad()
z.retain_grad()
y.retain_grad()

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)
tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>)
tensor(27., grad_fn=<MeanBackward0>)


In [35]:
# y,z는 operation으로 생성된 결과이기 때문에 grad_fn이 있지만 , x는 없다.
print(out.data, out.grad, out.grad_fn)
print(z.data, z.grad, z.grad_fn)
print(y.data, y.grad, y.grad_fn)
print(x.data, x.grad, x.grad_fn)

tensor(27.) None <MeanBackward0 object at 0x7f2d060258b0>
tensor([[27., 27.],
        [27., 27.]]) None <MulBackward0 object at 0x7f2d060254c0>
tensor([[3., 3.],
        [3., 3.]]) None <AddBackward0 object at 0x7f2d060258b0>
tensor([[1., 1.],
        [1., 1.]]) None None


In [36]:
out.backward()

print(out.data, out.grad)
print(z.data, z.grad)
print(y.data, y.grad)
print(x.data, x.grad)

tensor(27.) tensor(1.)
tensor([[27., 27.],
        [27., 27.]]) tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]])
tensor([[3., 3.],
        [3., 3.]]) tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])
tensor([[1., 1.],
        [1., 1.]]) tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


* 실제로 Gradient 를 계산하면 다음과 같다. <br>
$$\frac{\partial o}{\partial o} = 1 $$

$$o = \frac{1}{4}\sum_{i} z_{i}$$ 

$$\frac{\partial o}{\partial z_{i}} = 0.25 $$

$$z_{i}=3(y_{i})^{2}$$

$$\frac{\partial o}{\partial y_{i}} = 0.25 * \frac{\partial z_{i}}{\partial y_{i}} = 1.5 * y_{i}|_{y_{i}=3} = 4.5 $$

$$y = x + 2  $$

$$\frac{\partial o}{\partial x_{i}}|_{x_{i}=1} = \frac{\partial o}{\partial y_{i}} = 4.5$$

### Gradients 
* out.backward()을 하면 out의 gradient를 1로 시작해 Back-propagation을 시작한다.
* .backward()를 호출한 이후부터는 .grad를 통해 각 변수의 gradient를 구할 수 있다.
* https://teamdable.github.io/techblog/PyTorch-Autograd

In [37]:
import torch

x = torch.tensor(5.0)
y = x ** 3
z = torch.log(y)

print('x', x)
print('y', y)
print('z', z)

x tensor(5.)
y tensor(125.)
z tensor(4.8283)


In [38]:
def get_tensor_info(tensor):
  info = []
  for name in ['requires_grad', 'is_leaf', 'retains_grad', 'grad_fn', 'grad']:
    info.append(f'{name}({getattr(tensor, name, None)})')
  info.append(f'tensor({str(tensor)})')
  return ' '.join(info)

x = torch.tensor(5.0)
y = x ** 3
z = torch.log(y)

print('x', get_tensor_info(x))
print('y', get_tensor_info(y))
print('z', get_tensor_info(z))

x requires_grad(False) is_leaf(True) retains_grad(False) grad_fn(None) grad(None) tensor(tensor(5.))
y requires_grad(False) is_leaf(True) retains_grad(False) grad_fn(None) grad(None) tensor(tensor(125.))
z requires_grad(False) is_leaf(True) retains_grad(False) grad_fn(None) grad(None) tensor(tensor(4.8283))


In [39]:
x = torch.tensor(5.0, requires_grad=True)
y = x ** 3
z = torch.log(y)

print('x', get_tensor_info(x))
print('y', get_tensor_info(y))
print('z', get_tensor_info(z))

z.backward()

print('x_after_backward', get_tensor_info(x))
print('y_after_backward', get_tensor_info(y))
print('z_after_backward', get_tensor_info(z))

x requires_grad(True) is_leaf(True) retains_grad(False) grad_fn(None) grad(None) tensor(tensor(5., requires_grad=True))
y requires_grad(True) is_leaf(False) retains_grad(False) grad_fn(<PowBackward0 object at 0x7f2d0602a610>) grad(None) tensor(tensor(125., grad_fn=<PowBackward0>))
z requires_grad(True) is_leaf(False) retains_grad(False) grad_fn(<LogBackward0 object at 0x7f2d0602a700>) grad(None) tensor(tensor(4.8283, grad_fn=<LogBackward0>))
x_after_backward requires_grad(True) is_leaf(True) retains_grad(False) grad_fn(None) grad(0.6000000238418579) tensor(tensor(5., requires_grad=True))
y_after_backward requires_grad(True) is_leaf(False) retains_grad(False) grad_fn(<PowBackward0 object at 0x7f2da2c6aa00>) grad(None) tensor(tensor(125., grad_fn=<PowBackward0>))
z_after_backward requires_grad(True) is_leaf(False) retains_grad(False) grad_fn(<LogBackward0 object at 0x7f2d0602a8e0>) grad(None) tensor(tensor(4.8283, grad_fn=<LogBackward0>))


  return self._grad


In [40]:
x = torch.tensor(5.0, requires_grad=True)
y = x ** 3
z = torch.log(y)

print('x_before_backward :', get_tensor_info(x))
print('y_before_backward :', get_tensor_info(y))
print('z_before_backward :', get_tensor_info(z))

y.retain_grad()
z.retain_grad()
z.backward()

print('x_after_backward :', get_tensor_info(x))
print('y_after_backward :', get_tensor_info(y))
print('z_after_backward :', get_tensor_info(z))

x_before_backward : requires_grad(True) is_leaf(True) retains_grad(False) grad_fn(None) grad(None) tensor(tensor(5., requires_grad=True))
y_before_backward : requires_grad(True) is_leaf(False) retains_grad(False) grad_fn(<PowBackward0 object at 0x7f2d0602a8e0>) grad(None) tensor(tensor(125., grad_fn=<PowBackward0>))
z_before_backward : requires_grad(True) is_leaf(False) retains_grad(False) grad_fn(<LogBackward0 object at 0x7f2d0602a550>) grad(None) tensor(tensor(4.8283, grad_fn=<LogBackward0>))
x_after_backward : requires_grad(True) is_leaf(True) retains_grad(False) grad_fn(None) grad(0.6000000238418579) tensor(tensor(5., requires_grad=True))
y_after_backward : requires_grad(True) is_leaf(False) retains_grad(True) grad_fn(<PowBackward0 object at 0x7f2d0f3fbd60>) grad(0.00800000037997961) tensor(tensor(125., grad_fn=<PowBackward0>))
z_after_backward : requires_grad(True) is_leaf(False) retains_grad(True) grad_fn(<LogBackward0 object at 0x7f2d0602a640>) grad(1.0) tensor(tensor(4.8283, gr

In [41]:
x = torch.tensor(5.0, requires_grad=True)
y = x ** 3
z = torch.log(y)

print('x', get_tensor_info(x))
print('y', get_tensor_info(y))
print('z', get_tensor_info(z))

z.backward(retain_graph=True)

print('x_after_backward', get_tensor_info(x))
print('y_after_backward', get_tensor_info(y))
print('z_after_backward', get_tensor_info(z))

z.backward()

print('x_after_2backward', get_tensor_info(x))
print('y_after_2backward', get_tensor_info(y))
print('z_after_2backward', get_tensor_info(z))

x requires_grad(True) is_leaf(True) retains_grad(False) grad_fn(None) grad(None) tensor(tensor(5., requires_grad=True))
y requires_grad(True) is_leaf(False) retains_grad(False) grad_fn(<PowBackward0 object at 0x7f2d0602ae20>) grad(None) tensor(tensor(125., grad_fn=<PowBackward0>))
z requires_grad(True) is_leaf(False) retains_grad(False) grad_fn(<LogBackward0 object at 0x7f2d0602a370>) grad(None) tensor(tensor(4.8283, grad_fn=<LogBackward0>))
x_after_backward requires_grad(True) is_leaf(True) retains_grad(False) grad_fn(None) grad(0.6000000238418579) tensor(tensor(5., requires_grad=True))
y_after_backward requires_grad(True) is_leaf(False) retains_grad(False) grad_fn(<PowBackward0 object at 0x7f2d0602a4f0>) grad(None) tensor(tensor(125., grad_fn=<PowBackward0>))
z_after_backward requires_grad(True) is_leaf(False) retains_grad(False) grad_fn(<LogBackward0 object at 0x7f2d0602a370>) grad(None) tensor(tensor(4.8283, grad_fn=<LogBackward0>))
x_after_2backward requires_grad(True) is_leaf(Tru