# Pytorch
---
## Tensors
* Tensorflow의 Tensor와 다르지 않다.
  * Numpy의 ndarrays를 기본적으로 활용하고 있다.
  * Numpy의 ndarrays의 대부분의 operation을 사용할 수 있도록 구성되어 있다.
* Numpy의 operation은 CPU만을 이용해 느리지만 Tensor는 CUDA를 활용해 GPU를 이용하기 때문에 빠르게 연산을 진행할 수 있다.

In [1]:
%matplotlib inline

from matplotlib import pyplot as plt

In [2]:
import torch
torch.cuda.is_available()

True

In [4]:
x = torch.Tensor(5,3)
print(x)
print(x.shape)
print(x.size())
print(x.dim())

tensor([[ 1.6567e-35,  3.0899e-41,  1.4962e-35],
        [ 3.0899e-41,  1.1210e-43,  0.0000e+00],
        [ 1.5695e-43,  0.0000e+00,  1.6283e-37],
        [ 3.0899e-41, -4.0111e+02,  4.5656e-41],
        [ 8.4077e-39,  3.0899e-41, -4.0676e+02]]) 

torch.Size([5, 3])
torch.Size([5, 3])
2


In [5]:
# Construct a matrix with the list
x = torch.tensor([[0,1,2],[3,4,5]])
print(x)
print(x.shape)

tensor([[3, 4, 5],
        [1, 2, 3]]) 

torch.Size([2, 3])


In [6]:
x = torch.arange(6).reshape(2,3)
print(x)
print(x.shape)

tensor([[0, 1, 2],
        [3, 4, 5]]) 

torch.Size([2, 3])


In [7]:
# Construct a randomly initialized matrix 
x = torch.rand(5, 3) # np.random.rand
print(x)
print(x.grad)

tensor([[0.9341, 0.5818, 0.2001],
        [0.5163, 0.2177, 0.9530],
        [0.4203, 0.3605, 0.0297],
        [0.8837, 0.8632, 0.5059],
        [0.8130, 0.1951, 0.0714]]) 

None


In [8]:
# Construct a 5 x 3 matrix, uninitialized (random initialized)
x = torch.Tensor(5, 3)
print(x)

# Construct a randomly initialized matrix 
x = torch.rand(5, 3)
print(x)

# Construct a matrix with the list
x = torch.tensor([[3, 4, 5], [1, 2, 3]])
print(x)

# Get its size
print(x.size())
print(x.shape)

# Get its grad
print(x.grad)

tensor([[-3.0935e+02,  4.5656e-41, -3.0935e+02],
        [ 4.5656e-41, -4.2492e+02,  4.5656e-41],
        [-4.2327e+02,  4.5656e-41, -7.3117e+01],
        [ 4.5656e-41,  7.2843e-38,  0.0000e+00],
        [ 8.9683e-44,  0.0000e+00,  1.7937e-43]]) 

tensor([[0.3960, 0.1031, 0.6846],
        [0.1626, 0.9639, 0.3853],
        [0.8824, 0.7609, 0.0777],
        [0.3311, 0.4562, 0.3045],
        [0.0915, 0.8151, 0.8239]]) 

tensor([[3, 4, 5],
        [1, 2, 3]]) 

torch.Size([2, 3])
torch.Size([2, 3])
None


### dtype and device 
 * dtype - Tensor의 데이터 타입
 * device - Tensor의 작업 위치 (cpu or cuda)

In [9]:
x = torch.tensor([[3, 4, 5], [1, 2, 3]], dtype=torch.float64)
print(x)

y = torch.tensor([[3, 4, 5], [1, 2, 3]], dtype=torch.int)
print(y)


print(x + y)

tensor([[3., 4., 5.],
        [1., 2., 3.]], dtype=torch.float64) 

tensor([[3, 4, 5],
        [1, 2, 3]], dtype=torch.int32) 

tensor([[ 6.,  8., 10.],
        [ 2.,  4.,  6.]], dtype=torch.float64)


In [16]:
x = torch.tensor([[3, 4, 5], [1, 2, 3]], dtype=torch.float32)
print(x)
print(x.dtype)
y = x.double()
print(y)
print(x+y)

tensor([[3., 4., 5.],
        [1., 2., 3.]]) 

torch.float32
tensor([[3., 4., 5.],
        [1., 2., 3.]], dtype=torch.float64)
tensor([[ 6.,  8., 10.],
        [ 2.,  4.,  6.]], dtype=torch.float64)


In [21]:
x = torch.tensor([[3, 4, 5], [1, 2, 3]], dtype=torch.float32)
print(x.device)
x = x.to(torch.device('cuda'))
print(x.device)
x = x.to(torch.device('cuda:1'))
print(x.device)

cpu
cuda:0
cuda:1


In [22]:
x = x.cuda()
print(x.device)
x = x.cpu()
print(x.device)

cuda:0
cpu


In [29]:
device_0 = torch.device('cuda:0')
device_1 = torch.device('cuda:1')

x = torch.randn(4, 3, dtype=torch.float64)
y = torch.randn(4, 3, dtype=torch.float32)
z = torch.randint(0, 10, (4, 3), dtype=torch.int32)

z = z.to(device_1)

print('Before "to" method')

print(x.dtype, x.device)
print(y.dtype, y.device)
print(z.dtype, z.device, '\n')

Before "to" method
torch.float64 cpu
torch.float32 cpu
torch.int32 cuda:1 



In [30]:
print('After "to" method')
# to method with specific dtype and device 
x = x.to(dtype=torch.int32, device=device_0)

# to method with some tensor 
y = y.to(z)
z = z.to(device='cpu')

print(x.dtype, x.device)
print(y.dtype, y.device)
print(z.dtype, z.device, '\n')

After "to" method
torch.int32 cuda:0
torch.int32 cuda:1
torch.int32 cpu 



### Constructing like Numpy

In [28]:
x = torch.empty(3, 5)
print(x)

x = torch.zeros(3, 5)
print(x)

x = torch.ones(3, 5)
print(x)

x = torch.full((3, 5), 3.1415)
print(x)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]) 

tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]]) 

tensor([[3.1415, 3.1415, 3.1415, 3.1415, 3.1415],
        [3.1415, 3.1415, 3.1415, 3.1415, 3.1415],
        [3.1415, 3.1415, 3.1415, 3.1415, 3.1415]]) 



In [31]:
x = torch.arange(0, 5, 1)
print(x)

y = torch.linspace(0, 5, 9)
print(y)

z = torch.logspace(-10, 10, 5)
print(z)

tensor([0, 1, 2, 3, 4]) 

tensor([0.0000, 0.6250, 1.2500, 1.8750, 2.5000, 3.1250, 3.7500, 4.3750, 5.0000]) 

tensor([1.0000e-10, 1.0000e-05, 1.0000e+00, 1.0000e+05, 1.0000e+10]) 



In [32]:
z = torch.eye(5) # I: Identity Matrix
print(z)

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])


In [34]:
# Construct a 3 x 5 matrix with random value from uniform distribution, i.e. Uniform[0, 1)
x = torch.rand(3, 5)
print(x)

# Construct a 3 x 5 matrix with random value from normal distribution, i.e. Normal(0, 1)
x = torch.randn(3, 5)
print(x)

x = torch.randint(3, 10, (3, 5))
print(x)

tensor([[0.4583, 0.1945, 0.3125, 0.1438, 0.4175],
        [0.5065, 0.0354, 0.1886, 0.4496, 0.4129],
        [0.3414, 0.8074, 0.5479, 0.5427, 0.3460]])
tensor([[ 0.6518, -0.0999, -1.0731, -1.0218, -1.3907],
        [ 2.1546,  1.6530, -0.7026, -0.5959,  0.2117],
        [ 0.7043, -1.6927, -1.4039,  0.0886,  1.2839]])
tensor([[4, 7, 4, 4, 4],
        [6, 7, 8, 7, 9],
        [9, 9, 7, 7, 8]])


- From numpy to tensor

In [48]:
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
c = torch.Tensor(a)
print(b.device)
print(c.dtype)
d = b.numpy()
e = np.array(b)
print(type(d))
print("\n",a,"\n",b,"\n",c,"\n",d,"\n",e)

cpu
torch.float32
<class 'numpy.ndarray'>

 [1. 1. 1. 1. 1.] 
 tensor([1., 1., 1., 1., 1.], dtype=torch.float64) 
 tensor([1., 1., 1., 1., 1.]) 
 [1. 1. 1. 1. 1.] 
 [1. 1. 1. 1. 1.]


### Operations
* Operations에도 여러가지 syntax가 있다.

In [49]:
x = torch.rand(5, 3)
y = torch.rand(1, 3)

In [50]:
posco = x + y
print(posco)

tensor([[1.1414, 1.3811, 0.8623],
        [0.4634, 0.4945, 1.1879],
        [0.8353, 1.2414, 0.9865],
        [1.0184, 0.6666, 1.2346],
        [1.0225, 0.9829, 1.4727]])


In [59]:
x = torch.rand(5, 3)
y = torch.rand(5, 3)
print("solution 1 : ", x + y, '\n')


print("solution 2 : ", torch.add(x, y), '\n')


result = torch.Tensor(5, 3)
torch.add(x, y, out=result)
print("solution 3 : ", result, '\n')

y.add_(x)
print("solution 4 : ", y, '\n')


solution 1 :  tensor([[1.6202, 0.9356, 1.8683],
        [1.0249, 0.5471, 0.6959],
        [1.2044, 1.0520, 0.3116],
        [1.0311, 0.5876, 1.1942],
        [1.1928, 0.1226, 1.0589]]) 

solution 2 :  tensor([[1.6202, 0.9356, 1.8683],
        [1.0249, 0.5471, 0.6959],
        [1.2044, 1.0520, 0.3116],
        [1.0311, 0.5876, 1.1942],
        [1.1928, 0.1226, 1.0589]]) 

solution 3 :  tensor([[1.6202, 0.9356, 1.8683],
        [1.0249, 0.5471, 0.6959],
        [1.2044, 1.0520, 0.3116],
        [1.0311, 0.5876, 1.1942],
        [1.1928, 0.1226, 1.0589]]) 

solution 4 :  tensor([[1.6202, 0.9356, 1.8683],
        [1.0249, 0.5471, 0.6959],
        [1.2044, 1.0520, 0.3116],
        [1.0311, 0.5876, 1.1942],
        [1.1928, 0.1226, 1.0589]]) 



### Same indexing as numpy

In [60]:
# indexing 또한 비슷하게
print(x)
print(x[:, 1], '\n')
print(x>0.5)
print(x[x > 0.5])

tensor([[0.9463, 0.7832, 0.9431],
        [0.3020, 0.4144, 0.1780],
        [0.3523, 0.6639, 0.2156],
        [0.9345, 0.0160, 0.3808],
        [0.7219, 0.0355, 0.1539]])
tensor([0.7832, 0.4144, 0.6639, 0.0160, 0.0355]) 

tensor([[ True,  True,  True],
        [False, False, False],
        [False,  True, False],
        [ True, False, False],
        [ True, False, False]])
tensor([0.9463, 0.7832, 0.9431, 0.6639, 0.9345, 0.7219])


### Squeeze and Unsqueeze

In [106]:
x = torch.rand(1, 20, 1, 128)
print(x.shape)
x = x.squeeze() # [1, 20, 1, 128] -> [20, 128]
print(x.shape)

torch.Size([1, 20, 1, 128])
torch.Size([20, 128])


In [107]:
x2 = torch.rand(1, 20, 1, 128)
print(x2.shape)
x2 = x2.squeeze(dim=2) # [1, 20, 1, 128] -> [1, 20, 128]
print(x2.shape)

torch.Size([1, 20, 1, 128])
torch.Size([1, 20, 128])


In [110]:
print(x.shape)
x3 = x.unsqueeze(0)
print(x3.shape)

torch.Size([20, 128])
torch.Size([1, 20, 128])


### multiplication and concatenation

In [111]:
x = torch.ones(5, 3)+1
y = torch.ones(5, 3)+2
z = x * y
print(x)
print(y)
print(z)

tensor([[2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.]])
tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]])
tensor([[6., 6., 6.],
        [6., 6., 6.],
        [6., 6., 6.],
        [6., 6., 6.],
        [6., 6., 6.]])


In [113]:
## matrix multiplication
## y = W.T * x + b
z= torch.matmul(x, y.t())
print(x.shape)
print(y.shape)
print(z, z.shape)
w = x @ y.T
w

torch.Size([5, 3])
torch.Size([5, 3])
tensor([[18., 18., 18., 18., 18.],
        [18., 18., 18., 18., 18.],
        [18., 18., 18., 18., 18.],
        [18., 18., 18., 18., 18.],
        [18., 18., 18., 18., 18.]]) torch.Size([5, 5])


tensor([[18., 18., 18., 18., 18.],
        [18., 18., 18., 18., 18.],
        [18., 18., 18., 18., 18.],
        [18., 18., 18., 18., 18.],
        [18., 18., 18., 18., 18.]])

In [114]:
print(x)
print(y)
z = torch.cat([x, y], dim=1)
print(z)
print(x.shape)
print(y.shape)
print(z.shape)

tensor([[2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.]])
tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]])
tensor([[2., 2., 2., 3., 3., 3.],
        [2., 2., 2., 3., 3., 3.],
        [2., 2., 2., 3., 3., 3.],
        [2., 2., 2., 3., 3., 3.],
        [2., 2., 2., 3., 3., 3.]])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 6])


### 넘파이의 다양한 operation들이 토치에 같은 함수나 변형된 함수로 대부분 탑재 되어있음.

---
## PyTorch의 Autograd: automatic differentiation
* Autograd package는 Tensors가 사용할 수 있는 모든 Operation의 Gradient를 자동으로 계산해준다.
* Tensor의 required_grad attribute를 이용해 gradient의 계산여부를 결정할 수 있다.
  * 계산이 완료된 이후에 .backward()를 호출하면 자동으로 gradient를 계산한다.
  * .grad attribute를 통해 마찬가지로 gradient에 접근할 수 있다. 
  * .grad_fn attribute를 통해 해당 Variable이 어떻게 생성되었는지 확인할 수 있다. 해당 값으로 해당 노드의 local gradient 구할 수 있게 됨.
  
  

In [147]:
# Create a variable
x = torch.ones(2, 2, requires_grad=True)

print(x)
print(x.requires_grad)
print(x.grad)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
True
None


In [148]:
y = x + 2
print(y)
z = y * y * 3
print(z)
out = z.mean()
print(out)

out.retain_grad()
z.retain_grad()
y.retain_grad()

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)
tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>)
tensor(27., grad_fn=<MeanBackward0>)


In [149]:
# y,z는 operation으로 생성된 결과이기 때문에 grad_fn이 있지만 , x는 없다.
print(out.data, out.grad, out.grad_fn)
print(z.data, z.grad, z.grad_fn)
print(y.data, y.grad, y.grad_fn)
print(x.data, x.grad, x.grad_fn)

tensor(27.) None <MeanBackward0 object at 0x7f44bc6d6c10>
tensor([[27., 27.],
        [27., 27.]]) None <MulBackward0 object at 0x7f44bc727ca0>
tensor([[3., 3.],
        [3., 3.]]) None <AddBackward0 object at 0x7f44bc79d1f0>
tensor([[1., 1.],
        [1., 1.]]) None None


In [150]:
out.backward()

print(out.data, out.grad)
print(z.data, z.grad)
print(y.data, y.grad)
print(x.data, x.grad)

tensor(27.) tensor(1.)
tensor([[27., 27.],
        [27., 27.]]) tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]])
tensor([[3., 3.],
        [3., 3.]]) tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])
tensor([[1., 1.],
        [1., 1.]]) tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


* 실제로 Gradient 를 계산하면 다음과 같다. <br>
$$\frac{\partial o}{\partial o} = 1 $$

$$o = \frac{1}{4}\sum_{i} z_{i}$$ 

$$\frac{\partial o}{\partial z_{i}} = 0.25 $$

$$z_{i}=3(y_{i})^{2}$$

$$\frac{\partial o}{\partial y_{i}} = 0.25 * \frac{\partial z_{i}}{\partial y_{i}} = 1.5 * y_{i}|_{y_{i}=3} = 4.5 $$

$$y = x + 2  $$

$$\frac{\partial o}{\partial x_{i}}|_{x_{i}=1} = \frac{\partial o}{\partial y_{i}} = 4.5$$

### Gradients 
* out.backward()을 하면 out의 gradient를 1로 시작해 Back-propagation을 시작한다.
* .backward()를 호출한 이후부터는 .grad를 통해 각 변수의 gradient를 구할 수 있다.
* https://teamdable.github.io/techblog/PyTorch-Autograd

In [134]:
import torch

x = torch.tensor(5.0)
y = x ** 3
z = torch.log(y)

print('x', x)
print('y', y)
print('z', z)

x tensor(5.)
y tensor(125.)
z tensor(4.8283)


In [151]:
def get_tensor_info(tensor):
  info = []
  for name in ['requires_grad', 'is_leaf', 'retains_grad', 'grad_fn', 'grad']:
    info.append(f'{name}({getattr(tensor, name, None)})')
  info.append(f'tensor({str(tensor)})')
  return ' '.join(info)

x = torch.tensor(5.0)
y = x ** 3
z = torch.log(y)

print('x', get_tensor_info(x))
print('y', get_tensor_info(y))
print('z', get_tensor_info(z))

x requires_grad(False) is_leaf(True) retains_grad(False) grad_fn(None) grad(None) tensor(tensor(5.))
y requires_grad(False) is_leaf(True) retains_grad(False) grad_fn(None) grad(None) tensor(tensor(125.))
z requires_grad(False) is_leaf(True) retains_grad(False) grad_fn(None) grad(None) tensor(tensor(4.8283))


In [152]:
x = torch.tensor(5.0, requires_grad=True)
y = x ** 3
z = torch.log(y)

print('x', get_tensor_info(x))
print('y', get_tensor_info(y))
print('z', get_tensor_info(z))

z.backward()

print('x_after_backward', get_tensor_info(x))
print('y_after_backward', get_tensor_info(y))
print('z_after_backward', get_tensor_info(z))

x requires_grad(True) is_leaf(True) retains_grad(False) grad_fn(None) grad(None) tensor(tensor(5., requires_grad=True))
y requires_grad(True) is_leaf(False) retains_grad(False) grad_fn(<PowBackward0 object at 0x7f44bc684760>) grad(None) tensor(tensor(125., grad_fn=<PowBackward0>))
z requires_grad(True) is_leaf(False) retains_grad(False) grad_fn(<LogBackward0 object at 0x7f44bc6841c0>) grad(None) tensor(tensor(4.8283, grad_fn=<LogBackward0>))
x_after_backward requires_grad(True) is_leaf(True) retains_grad(False) grad_fn(None) grad(0.6000000238418579) tensor(tensor(5., requires_grad=True))
y_after_backward requires_grad(True) is_leaf(False) retains_grad(False) grad_fn(<PowBackward0 object at 0x7f44bc6d6ac0>) grad(None) tensor(tensor(125., grad_fn=<PowBackward0>))
z_after_backward requires_grad(True) is_leaf(False) retains_grad(False) grad_fn(<LogBackward0 object at 0x7f44bc6841c0>) grad(None) tensor(tensor(4.8283, grad_fn=<LogBackward0>))


In [153]:
x = torch.tensor(5.0, requires_grad=True)
y = x ** 3
z = torch.log(y)

print('x_before_backward :', get_tensor_info(x))
print('y_before_backward :', get_tensor_info(y))
print('z_before_backward :', get_tensor_info(z))

y.retain_grad()
z.retain_grad()
z.backward()

print('x_after_backward :', get_tensor_info(x))
print('y_after_backward :', get_tensor_info(y))
print('z_after_backward :', get_tensor_info(z))

x_before_backward : requires_grad(True) is_leaf(True) retains_grad(False) grad_fn(None) grad(None) tensor(tensor(5., requires_grad=True))
y_before_backward : requires_grad(True) is_leaf(False) retains_grad(False) grad_fn(<PowBackward0 object at 0x7f44bc6840d0>) grad(None) tensor(tensor(125., grad_fn=<PowBackward0>))
z_before_backward : requires_grad(True) is_leaf(False) retains_grad(False) grad_fn(<LogBackward0 object at 0x7f44bc684610>) grad(None) tensor(tensor(4.8283, grad_fn=<LogBackward0>))
x_after_backward : requires_grad(True) is_leaf(True) retains_grad(False) grad_fn(None) grad(0.6000000238418579) tensor(tensor(5., requires_grad=True))
y_after_backward : requires_grad(True) is_leaf(False) retains_grad(True) grad_fn(<PowBackward0 object at 0x7f44bc713760>) grad(0.00800000037997961) tensor(tensor(125., grad_fn=<PowBackward0>))
z_after_backward : requires_grad(True) is_leaf(False) retains_grad(True) grad_fn(<LogBackward0 object at 0x7f44bc684160>) grad(1.0) tensor(tensor(4.8283, gr

In [154]:
x = torch.tensor(5.0, requires_grad=True)
y = x ** 3
z = torch.log(y)

print('x', get_tensor_info(x))
print('y', get_tensor_info(y))
print('z', get_tensor_info(z))

z.backward(retain_graph=True)

print('x_after_backward', get_tensor_info(x))
print('y_after_backward', get_tensor_info(y))
print('z_after_backward', get_tensor_info(z))

z.backward()

print('x_after_2backward', get_tensor_info(x))
print('y_after_2backward', get_tensor_info(y))
print('z_after_2backward', get_tensor_info(z))

x requires_grad(True) is_leaf(True) retains_grad(False) grad_fn(None) grad(None) tensor(tensor(5., requires_grad=True))
y requires_grad(True) is_leaf(False) retains_grad(False) grad_fn(<PowBackward0 object at 0x7f45bcbaadf0>) grad(None) tensor(tensor(125., grad_fn=<PowBackward0>))
z requires_grad(True) is_leaf(False) retains_grad(False) grad_fn(<LogBackward0 object at 0x7f44bc6d6910>) grad(None) tensor(tensor(4.8283, grad_fn=<LogBackward0>))
x_after_backward requires_grad(True) is_leaf(True) retains_grad(False) grad_fn(None) grad(0.6000000238418579) tensor(tensor(5., requires_grad=True))
y_after_backward requires_grad(True) is_leaf(False) retains_grad(False) grad_fn(<PowBackward0 object at 0x7f450386c730>) grad(None) tensor(tensor(125., grad_fn=<PowBackward0>))
z_after_backward requires_grad(True) is_leaf(False) retains_grad(False) grad_fn(<LogBackward0 object at 0x7f44bc6d6040>) grad(None) tensor(tensor(4.8283, grad_fn=<LogBackward0>))
x_after_2backward requires_grad(True) is_leaf(Tru