https://tutorials.pytorch.kr/beginner/deep_learning_60min_blitz.html 
pytorch tutorial 내용을 공부하며 정리했습니다.

In [0]:
from __future__ import print_function
import torch
import numpy as np

## Tensors
array와 유사하고 GPU 연산 가속 가능

In [0]:
x = torch.empty(5,3) ;x

tensor([[5.7124e-36, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 2.8026e-45],
        [0.0000e+00, 1.1210e-44, 0.0000e+00],
        [1.4013e-45, 0.0000e+00, 0.0000e+00]])

In [0]:
x = torch.rand(5,3) ;x

tensor([[0.6385, 0.4690, 0.1798],
        [0.1305, 0.4047, 0.8124],
        [0.4817, 0.3741, 0.1926],
        [0.8437, 0.0204, 0.1638],
        [0.8566, 0.5624, 0.0056]])

In [0]:
x = torch.zeros(5,3,dtype=torch.long) ;x

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

In [0]:
x = x.new_ones(5,3,dtype=torch.double) ;x
x = torch.randn_like(x) ;x

tensor([[ 1.1777, -0.1160,  0.0835],
        [-0.0236,  0.5521,  0.3640],
        [-0.8477, -0.9314,  0.7829],
        [ 0.1074,  0.7407, -1.3487],
        [-1.7297, -1.1135,  0.0645]], dtype=torch.float64)

In [0]:
print(x.size())

torch.Size([5, 3])


In [0]:
x = torch.randn(4,4)
y = x.view(16)
z = x.view(-1,8)
print(x.size(),y.size(),z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


## AutoGrad
Tensor의 모든 연산에 대해 자동 미분을 제공

In [0]:
x = torch.ones(2,2,requires_grad=True) ;x

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [0]:
y = x+2 ;y

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)

In [0]:
print(y.grad_fn)

<AddBackward0 object at 0x7f8895153d68>


In [0]:
z = y * y * 3
out = z.mean() 
print(z,out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)


In [0]:
out.backward()

In [0]:
print(x.grad)

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


In [0]:
x = torch.randn(3,requires_grad=True)
y = x * 2
while y.data.norm() < 1000:
  y = y * 2
print(y)

tensor([ 717.7164, 1102.6045,  732.0330], grad_fn=<MulBackward0>)


## Neural Networks

In [0]:
import torch.nn as nn
import torch.nn.functional as F

In [0]:
class Net(nn.Module):

  def __init__(self):
    super(Net,self).__init__()
    self.conv1 = nn.Conv2d(1,6,3)
    self.conv2 = nn.Conv2d(6,16,3)
    self.fc1 = nn.Linear(16*6*6,120) 
    self.fc2 = nn.Linear(120,84)
    self.fc3 = nn.Linear(84,10)

  def forward(self, x):
    x = F.max_pool2d(F.relu(self.conv1(x)),(2,2))
    x = F.max_pool2d(F.relu(self.conv2(x)),2)
    x = torch.flatten(x)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x

In [4]:
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [5]:
params = list(net.parameters())
print(len(params))
print(params[0].size()) # 첫번째 layer weight shape

10
torch.Size([6, 1, 3, 3])


In [6]:
input = torch.randn(1,1,32,32) # nSample(batchsize) x 채널 x 행 x 열
out = net(input) 
print(out.shape, out)

torch.Size([10]) tensor([ 0.0784, -0.0616,  0.0028, -0.1220,  0.0737, -0.0568, -0.0487,  0.0606,
         0.1519,  0.0397], grad_fn=<AddBackward0>)


In [0]:
net.zero_grad() # 네트워크 gradient 0으로 초기화
out.backward(torch.randn(10))

## Loss function

In [0]:
output = net(input)
target = torch.randn(10)
criterion = nn.MSELoss()

In [9]:
loss = criterion(output,target)
print(loss)

tensor(0.8623, grad_fn=<MseLossBackward>)


 .grad_fn 속성을 사용하여 loss 를 역방향에서 따라가다보면, 이러한 모습의 연산 그래프를 볼 수 있습니다.

input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
      -> view -> linear -> relu -> linear -> relu -> linear
      -> MSELoss
      -> loss

In [10]:
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU

<MseLossBackward object at 0x7f7788371b70>
<AddBackward0 object at 0x7f7788371ac8>
<SqueezeBackward3 object at 0x7f7788371b70>


## Backprop
역전파 전과 후에 conv1의 bias gradient를 살펴보겠습니다.

In [11]:
net.zero_grad()
print('con1.bias.grad')
print(net.conv1.bias.grad)

loss.backward(retain_graph=True) # 오차 역전파

print('con1.bias.grad')
print(net.conv1.bias.grad)

loss.backward(retain_graph=True)
print('con1.bias.grad')
print(net.conv1.bias.grad)

con1.bias.grad
tensor([0., 0., 0., 0., 0., 0.])
con1.bias.grad
tensor([-0.0055,  0.0051, -0.0085,  0.0007,  0.0237, -0.0217])
con1.bias.grad
tensor([-0.0110,  0.0102, -0.0171,  0.0014,  0.0474, -0.0433])


## update weight(optimizer)

In [0]:
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr = 0.001)

optimizer.zero_grad()
output = net(input)
loss = criterion(output,target)
loss.backward()
optimizer.step()