# Pytorch
---
## Tensors
* Tensorflow의 Tensor와 다르지 않다.
  * Numpy의 ndarrays를 기본적으로 활용하고 있다.
  * Numpy의 ndarrays의 대부분의 operation을 사용할 수 있도록 구성되어 있다.
* Numpy의 operation은 CPU만을 이용해 느리지만 Tensor는 CUDA를 활용해 GPU를 이용하기 때문에 빠르게 연산을 진행할 수 있다.

In [1]:
%matplotlib inline

from matplotlib import pyplot as plt

In [69]:
import torch
torch.cuda.is_available()

!nvidia-smi

Mon Jun 13 07:54:28 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.82       Driver Version: 440.82       CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 108...  Off  | 00000000:03:00.0 Off |                  N/A |
| 23%   37C    P8     9W / 250W |    577MiB / 11176MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  GeForce GTX 108...  Off  | 00000000:04:00.0 Off |                  N/A |
| 29%   36C    P8     9W / 250W |    577MiB / 11178MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-------

In [70]:
x = torch.rand(10000,100000).to('cuda')

In [72]:
!nvidia-smi

Mon Jun 13 07:55:44 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.82       Driver Version: 440.82       CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 108...  Off  | 00000000:03:00.0 Off |                  N/A |
| 23%   43C    P5    13W / 250W |   4393MiB / 11176MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  GeForce GTX 108...  Off  | 00000000:04:00.0 Off |                  N/A |
| 29%   36C    P8     9W / 250W |    577MiB / 11178MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-------

In [77]:
x.dtype
1000000000/1024/1024*4 + 577

4391.697265625

In [5]:
x = torch.Tensor(5,3)
print(x)
print(x.shape)
print(x.size())
print(x.dim())

tensor([[-8.0936e-15,  4.5835e-41, -1.8887e-24],
        [ 3.0620e-41,  1.3563e-19,  1.3569e-19],
        [ 2.3301e-09,  1.3556e-19,  1.5257e-19],
        [ 1.3570e-19,  6.6545e-33,  1.3563e-19],
        [ 1.6114e-19,  3.4543e+30,  1.7864e+25]])
torch.Size([5, 3])
torch.Size([5, 3])
2


In [6]:
# Construct a matrix with the list
x = torch.tensor([[0,1,2],[3,4,5]])
print(x)
print(x.shape)

tensor([[0, 1, 2],
        [3, 4, 5]])
torch.Size([2, 3])


In [7]:
x = torch.arange(6).reshape(2,3)
print(x)
print(x.shape)

tensor([[0, 1, 2],
        [3, 4, 5]])
torch.Size([2, 3])


In [8]:
# Construct a randomly initialized matrix 
x = torch.rand(5, 3) # np.random.rand
print(x)
print(x.grad)

tensor([[0.0905, 0.8385, 0.5971],
        [0.7587, 0.5293, 0.5693],
        [0.2580, 0.0111, 0.4075],
        [0.5826, 0.4118, 0.1744],
        [0.3852, 0.4031, 0.2682]])
None


In [12]:
# Construct a 5 x 3 matrix, uninitialized (random initialized)
x = torch.Tensor(5, 3)
print(x)

# Construct a randomly initialized matrix 
x = torch.rand(5, 3)
print(x)

# Construct a matrix with the list
x = torch.tensor([[3.5, 4, 5], [1, 2, 3]])
print(x.dtype)

# Get its size
print(x.size())
print(x.shape)

# Get its grad
print(x.grad)

tensor([[-8.0936e-15,  4.5835e-41, -8.0936e-15],
        [ 4.5835e-41,  5.7651e-01,  7.6007e-01],
        [ 6.9026e-01,  9.1783e-01,  3.2770e-01],
        [ 9.4797e-01,  7.1581e-02,  1.4809e-01],
        [ 5.0616e-01,  2.5177e-01,  9.2551e-01]])
tensor([[0.9047, 0.6213, 0.2686],
        [0.2948, 0.3269, 0.5125],
        [0.1985, 0.4824, 0.0502],
        [0.5937, 0.0558, 0.5935],
        [0.7950, 0.0395, 0.3007]])
torch.float32
torch.Size([2, 3])
torch.Size([2, 3])
None


### dtype and device 
 * dtype - Tensor의 데이터 타입
 * device - Tensor의 작업 위치 (cpu or cuda)

In [13]:
x = torch.tensor([[3, 4, 5], [1, 2, 3]], dtype=torch.float64)
print(x)

y = torch.tensor([[3, 4, 5], [1, 2, 3]], dtype=torch.int)
print(y)


print(x + y)

tensor([[3., 4., 5.],
        [1., 2., 3.]], dtype=torch.float64)
tensor([[3, 4, 5],
        [1, 2, 3]], dtype=torch.int32)
tensor([[ 6.,  8., 10.],
        [ 2.,  4.,  6.]], dtype=torch.float64)


In [14]:
x = torch.tensor([[3, 4, 5], [1, 2, 3]], dtype=torch.float32)
print(x)
print(x.dtype)
y = x.double()
print(y)
print(x+y)

tensor([[3., 4., 5.],
        [1., 2., 3.]])
torch.float32
tensor([[3., 4., 5.],
        [1., 2., 3.]], dtype=torch.float64)
tensor([[ 6.,  8., 10.],
        [ 2.,  4.,  6.]], dtype=torch.float64)


In [15]:
x = torch.tensor([[3, 4, 5], [1, 2, 3]], dtype=torch.float32)
print(x.device)
x = x.to(torch.device('cuda'))
print(x.device)
x = x.to(torch.device('cuda:1'))
print(x.device)

cpu
cuda:0
cuda:1


In [16]:
x = x.cuda()
print(x.device)
x = x.cpu()
print(x.device)

cuda:0
cpu


In [19]:
device = 'cuda:1' if torch.cuda.is_available() else 'cpu'
x = x.to(device)

In [21]:
device_0 = torch.device('cuda:0')
device_1 = torch.device('cuda:1')

x = torch.randn(4, 3, dtype=torch.float64)
y = torch.randn(4, 3, dtype=torch.float32)
z = torch.randint(0, 10, (4, 3), dtype=torch.int32)

z = z.to(device_1)

print('Before "to" method')

print(x.dtype, x.device)
print(y.dtype, y.device)
print(z.dtype, z.device, '\n')

Before "to" method
torch.float64 cpu
torch.float32 cpu
torch.int32 cuda:1 



In [22]:
print('After "to" method')
# to method with specific dtype and device 
x = x.to(dtype=torch.int32, device=device_0)

# to method with some tensor 
y = y.to(z)
z = z.to(device='cpu')

print(x.dtype, x.device)
print(y.dtype, y.device)
print(z.dtype, z.device, '\n')

After "to" method
torch.int32 cuda:0
torch.int32 cuda:1
torch.int32 cpu 



### Constructing like Numpy

In [25]:
x = torch.empty(3, 5)
print(x)

x = torch.zeros(3, 5)
print(x)

x = torch.ones(3, 5)
print(x)

x = torch.full((3, 5), 3.1415)
print(x)

tensor([[1.6880e+25, 2.5226e-18, 6.7062e+22, 5.4073e+22, 2.1298e+23],
        [1.0739e-05, 1.9989e+20, 1.2802e-11, 5.3177e-08, 2.5813e-09],
        [1.6987e+22, 3.1369e+27, 7.0800e+31, 3.1095e-18, 1.8590e+34]])
tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])
tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])
tensor([[3.1415, 3.1415, 3.1415, 3.1415, 3.1415],
        [3.1415, 3.1415, 3.1415, 3.1415, 3.1415],
        [3.1415, 3.1415, 3.1415, 3.1415, 3.1415]])


In [27]:
x = torch.arange(0, 5, 1)
x = torch.arange(5)
print(x)

y = torch.linspace(0, 5, 9)
print(y)

z = torch.logspace(-10, 10, 5)
print(z)

tensor([0, 1, 2, 3, 4])
tensor([0.0000, 0.6250, 1.2500, 1.8750, 2.5000, 3.1250, 3.7500, 4.3750, 5.0000])
tensor([1.0000e-10, 1.0000e-05, 1.0000e+00, 1.0000e+05, 1.0000e+10])


In [28]:
z = torch.eye(5) # I: Identity Matrix
print(z)

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])


In [29]:
# Construct a 3 x 5 matrix with random value from uniform distribution, i.e. Uniform[0, 1)
x = torch.rand(3, 5)
print(x)

# Construct a 3 x 5 matrix with random value from normal distribution, i.e. Normal(0, 1)
x = torch.randn(3, 5)
print(x)

x = torch.randint(3, 10, (3, 5))
print(x)

tensor([[0.2459, 0.1346, 0.5653, 0.6261, 0.5238],
        [0.6581, 0.2211, 0.6217, 0.2938, 0.9698],
        [0.4090, 0.5181, 0.2611, 0.4978, 0.6980]])
tensor([[-0.1675, -0.6749,  1.0925, -0.6593, -0.6047],
        [ 0.2443, -0.2702,  0.4234,  0.2609,  1.5687],
        [-0.8654, -0.8218,  0.3130, -1.2606,  0.2460]])
tensor([[7, 5, 4, 6, 7],
        [9, 3, 9, 3, 6],
        [7, 4, 9, 5, 5]])


- From numpy to tensor

In [35]:
import numpy as np
a = np.ones(5)
print(a)
b = torch.from_numpy(a)
c = torch.Tensor(a)
print(b.device)
print(c.device)
print(b.dtype)
print(c.dtype)
d = b.numpy()
e = np.array(b)
print(type(d))
print("\n",a,"\n",b,"\n",c,"\n",d,"\n",e)

[1. 1. 1. 1. 1.]
cpu
cpu
torch.float64
torch.float32
<class 'numpy.ndarray'>

 [1. 1. 1. 1. 1.] 
 tensor([1., 1., 1., 1., 1.], dtype=torch.float64) 
 tensor([1., 1., 1., 1., 1.]) 
 [1. 1. 1. 1. 1.] 
 [1. 1. 1. 1. 1.]


### Operations
* Operations에도 여러가지 syntax가 있다.

In [37]:
x = torch.rand(5, 3)
y = torch.rand(1, 3)
x = x + 3

In [38]:
posco = x + y
print(posco)

tensor([[3.7846, 3.9089, 3.8674],
        [3.8079, 4.3974, 3.9420],
        [4.2804, 4.3342, 4.0926],
        [3.9286, 4.2705, 3.6056],
        [3.6605, 4.1539, 3.4242]])


In [41]:
x = torch.rand(5, 3)
y = torch.rand(5, 3)
print("solution 1 : ", x + y, '\n')


print("solution 2 : ", torch.add(x, y), '\n')


result = torch.Tensor(5, 3)
torch.add(x, y, out=result)
print("solution 3 : ", result, '\n')

y.add_(x) # y = y + x
print("solution 4 : ", y, '\n')

print(x+y)

solution 1 :  tensor([[0.6450, 0.5101, 1.1401],
        [0.8112, 1.3695, 0.4760],
        [0.4035, 1.2139, 1.1941],
        [0.7043, 1.2612, 0.8963],
        [1.0410, 0.4389, 1.2423]]) 

solution 2 :  tensor([[0.6450, 0.5101, 1.1401],
        [0.8112, 1.3695, 0.4760],
        [0.4035, 1.2139, 1.1941],
        [0.7043, 1.2612, 0.8963],
        [1.0410, 0.4389, 1.2423]]) 

solution 3 :  tensor([[0.6450, 0.5101, 1.1401],
        [0.8112, 1.3695, 0.4760],
        [0.4035, 1.2139, 1.1941],
        [0.7043, 1.2612, 0.8963],
        [1.0410, 0.4389, 1.2423]]) 

solution 4 :  tensor([[0.6450, 0.5101, 1.1401],
        [0.8112, 1.3695, 0.4760],
        [0.4035, 1.2139, 1.1941],
        [0.7043, 1.2612, 0.8963],
        [1.0410, 0.4389, 1.2423]]) 

tensor([[0.7880, 0.8386, 1.4170],
        [1.4182, 2.3529, 0.4977],
        [0.4567, 1.9745, 1.9693],
        [0.8584, 2.2078, 0.9289],
        [1.1000, 0.8535, 2.0934]])


### Same indexing as numpy

In [43]:
# indexing 또한 비슷하게
print(x)
print(x[:, [1]], '\n')
print(x>0.5)
print(x[x > 0.5])

tensor([[0.1430, 0.3284, 0.2769],
        [0.6070, 0.9834, 0.0217],
        [0.0532, 0.7606, 0.7753],
        [0.1541, 0.9465, 0.0327],
        [0.0590, 0.4146, 0.8511]])
tensor([[0.3284],
        [0.9834],
        [0.7606],
        [0.9465],
        [0.4146]]) 

tensor([[False, False, False],
        [ True,  True, False],
        [False,  True,  True],
        [False,  True, False],
        [False, False,  True]])
tensor([0.6070, 0.9834, 0.7606, 0.7753, 0.9465, 0.8511])


### Squeeze and Unsqueeze

In [45]:
x = torch.rand(1, 2, 1, 3)
print(x.shape)
print(x)
x = x.squeeze() # [1, 20, 1, 128] -> [20, 128]
print(x.shape)
print(x)

torch.Size([1, 2, 1, 3])
tensor([[[[0.4884, 0.3529, 0.8457]],

         [[0.4126, 0.3411, 0.1079]]]])
torch.Size([2, 3])
tensor([[0.4884, 0.3529, 0.8457],
        [0.4126, 0.3411, 0.1079]])


In [46]:
x2 = torch.rand(1, 20, 1, 128)
print(x2.shape)
x2 = x2.squeeze(dim=2) # [1, 20, 1, 128] -> [1, 20, 128]
print(x2.shape)

torch.Size([1, 20, 1, 128])
torch.Size([1, 20, 128])


In [51]:
print(x.shape)
x = x.unsqueeze(0)
print(x.shape)

torch.Size([1, 1, 2, 3])
torch.Size([1, 1, 1, 2, 3])


### multiplication and concatenation

In [58]:
x = torch.arange(15).reshape(5,3).to(dtype=torch.float32)
y = torch.ones(5, 3)+2
z = x * y
print(x)
print(y)
print(z)

tensor([[ 0.,  1.,  2.],
        [ 3.,  4.,  5.],
        [ 6.,  7.,  8.],
        [ 9., 10., 11.],
        [12., 13., 14.]])
tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]])
tensor([[ 0.,  3.,  6.],
        [ 9., 12., 15.],
        [18., 21., 24.],
        [27., 30., 33.],
        [36., 39., 42.]])


In [60]:
## matrix multiplication
#y = W.T * x + b
z= torch.matmul(x, y.t())
print(x.shape)
print(y.shape)
print(z, z.shape)
w = x @ y.T
print(w)

torch.Size([5, 3])
torch.Size([5, 3])
tensor([[  9.,   9.,   9.,   9.,   9.],
        [ 36.,  36.,  36.,  36.,  36.],
        [ 63.,  63.,  63.,  63.,  63.],
        [ 90.,  90.,  90.,  90.,  90.],
        [117., 117., 117., 117., 117.]]) torch.Size([5, 5])
tensor([[  9.,   9.,   9.,   9.,   9.],
        [ 36.,  36.,  36.,  36.,  36.],
        [ 63.,  63.,  63.,  63.,  63.],
        [ 90.,  90.,  90.,  90.,  90.],
        [117., 117., 117., 117., 117.]])


In [62]:
print(x)
print(y)
z = torch.cat([x, y], dim=0)
print(z)
print(x.shape)
print(y.shape)
print(z.shape)

tensor([[ 0.,  1.,  2.],
        [ 3.,  4.,  5.],
        [ 6.,  7.,  8.],
        [ 9., 10., 11.],
        [12., 13., 14.]])
tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]])
tensor([[ 0.,  1.,  2.],
        [ 3.,  4.,  5.],
        [ 6.,  7.,  8.],
        [ 9., 10., 11.],
        [12., 13., 14.],
        [ 3.,  3.,  3.],
        [ 3.,  3.,  3.],
        [ 3.,  3.,  3.],
        [ 3.,  3.,  3.],
        [ 3.,  3.,  3.]])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([10, 3])


### 넘파이의 다양한 operation들이 토치에 같은 함수나 변형된 함수로 대부분 탑재 되어있음.

---
## PyTorch의 Autograd: automatic differentiation
* Autograd package는 Tensors가 사용할 수 있는 모든 Operation의 Gradient를 자동으로 계산해준다.
* Tensor의 required_grad attribute를 이용해 gradient의 계산여부를 결정할 수 있다.
  * 계산이 완료된 이후에 .backward()를 호출하면 자동으로 gradient를 계산한다.
  * .grad attribute를 통해 마찬가지로 gradient에 접근할 수 있다. 
  * .grad_fn attribute를 통해 해당 Variable이 어떻게 생성되었는지 확인할 수 있다. 해당 값으로 해당 노드의 local gradient 구할 수 있게 됨.
  
  

In [63]:
# Create a variable
x = torch.ones(2, 2, requires_grad=True)

print(x)
print(x.requires_grad)
print(x.grad)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
True
None


In [64]:
y = x + 2
print(y)
z = y * y * 3
print(z)
out = z.mean()
print(out)

out.retain_grad()
z.retain_grad()
y.retain_grad()

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)
tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>)
tensor(27., grad_fn=<MeanBackward0>)


In [65]:
# y,z는 operation으로 생성된 결과이기 때문에 grad_fn이 있지만 , x는 없다.
print(out.data, out.grad, out.grad_fn)
print(z.data, z.grad, z.grad_fn)
print(y.data, y.grad, y.grad_fn)
print(x.data, x.grad, x.grad_fn)

tensor(27.) None <MeanBackward0 object at 0x7fc4e6b7af70>
tensor([[27., 27.],
        [27., 27.]]) None <MulBackward0 object at 0x7fc4e6b7ae20>
tensor([[3., 3.],
        [3., 3.]]) None <AddBackward0 object at 0x7fc4e6b7af70>
tensor([[1., 1.],
        [1., 1.]]) None None


In [66]:
out.backward()

print(out.data, out.grad)
print(z.data, z.grad)
print(y.data, y.grad)
print(x.data, x.grad)

tensor(27.) tensor(1.)
tensor([[27., 27.],
        [27., 27.]]) tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]])
tensor([[3., 3.],
        [3., 3.]]) tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])
tensor([[1., 1.],
        [1., 1.]]) tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


* 실제로 Gradient 를 계산하면 다음과 같다. <br>
$$\frac{\partial o}{\partial o} = 1 $$

$$o = \frac{1}{4}\sum_{i} z_{i}$$ 

$$\frac{\partial o}{\partial z_{i}} = 0.25 $$

$$z_{i}=3(y_{i})^{2}$$

$$\frac{\partial o}{\partial y_{i}} = 0.25 * \frac{\partial z_{i}}{\partial y_{i}} = 1.5 * y_{i}|_{y_{i}=3} = 4.5 $$

$$y = x + 2  $$

$$\frac{\partial o}{\partial x_{i}}|_{x_{i}=1} = \frac{\partial o}{\partial y_{i}} = 4.5$$

### Gradients 
* out.backward()을 하면 out의 gradient를 1로 시작해 Back-propagation을 시작한다.
* .backward()를 호출한 이후부터는 .grad를 통해 각 변수의 gradient를 구할 수 있다.
* https://teamdable.github.io/techblog/PyTorch-Autograd

In [None]:
import torch

x = torch.tensor(5.0)
y = x ** 3
z = torch.log(y)

print('x', x)
print('y', y)
print('z', z)

In [None]:
def get_tensor_info(tensor):
  info = []
  for name in ['requires_grad', 'is_leaf', 'retains_grad', 'grad_fn', 'grad']:
    info.append(f'{name}({getattr(tensor, name, None)})')
  info.append(f'tensor({str(tensor)})')
  return ' '.join(info)

x = torch.tensor(5.0)
y = x ** 3
z = torch.log(y)

print('x', get_tensor_info(x))
print('y', get_tensor_info(y))
print('z', get_tensor_info(z))

In [None]:
x = torch.tensor(5.0, requires_grad=True)
y = x ** 3
z = torch.log(y)

print('x', get_tensor_info(x))
print('y', get_tensor_info(y))
print('z', get_tensor_info(z))

z.backward()

print('x_after_backward', get_tensor_info(x))
print('y_after_backward', get_tensor_info(y))
print('z_after_backward', get_tensor_info(z))

In [None]:
x = torch.tensor(5.0, requires_grad=True)
y = x ** 3
z = torch.log(y)

print('x_before_backward :', get_tensor_info(x))
print('y_before_backward :', get_tensor_info(y))
print('z_before_backward :', get_tensor_info(z))

y.retain_grad()
z.retain_grad()
z.backward()

print('x_after_backward :', get_tensor_info(x))
print('y_after_backward :', get_tensor_info(y))
print('z_after_backward :', get_tensor_info(z))

In [None]:
x = torch.tensor(5.0, requires_grad=True)
y = x ** 3
z = torch.log(y)

print('x', get_tensor_info(x))
print('y', get_tensor_info(y))
print('z', get_tensor_info(z))

z.backward(retain_graph=True)

print('x_after_backward', get_tensor_info(x))
print('y_after_backward', get_tensor_info(y))
print('z_after_backward', get_tensor_info(z))

z.backward()

print('x_after_2backward', get_tensor_info(x))
print('y_after_2backward', get_tensor_info(y))
print('z_after_2backward', get_tensor_info(z))