# 네트워크 정의

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):

  def __init__(self):
    super(Net, self).__init__()
    # 1 input channel, 6 output channel, 5x5 사각형 컨볼루션
    self.conv1 = nn.Conv2d(1, 6, 5)
    self.conv2 = nn.Conv2d(6, 16, 5) # 6 input channel, 16 output channel, 5x5 사각형 컨볼루션
    self.fc1 = nn.Linear(16 * 5 * 5, 120)
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 10)


  def forward(self, x):
    # Max Pooling (2,2)
    x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
    x = F.max_pool2d(F.relu(self.conv2(x)), 2)
    x = torch.flatten(x, 1)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x


net = Net()
print(net)


Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [2]:
params = list(net.parameters())
print(len(params))
print(params[0].size())

10
torch.Size([6, 1, 5, 5])


In [4]:
params

[Parameter containing:
 tensor([[[[-0.0521,  0.0699, -0.0806, -0.0048, -0.1841],
           [-0.1394,  0.1506, -0.1495,  0.1483,  0.1180],
           [ 0.0692,  0.1031,  0.0216,  0.0344, -0.1982],
           [-0.1822, -0.0190,  0.0226,  0.1622,  0.0174],
           [-0.0768,  0.1602,  0.1425, -0.1381, -0.1318]]],
 
 
         [[[-0.0118,  0.1269,  0.1820, -0.1061, -0.1000],
           [ 0.0463,  0.1599,  0.1946,  0.1997,  0.0318],
           [ 0.0348,  0.1634, -0.1962,  0.1728,  0.0433],
           [ 0.1578,  0.0138, -0.0359,  0.0896, -0.1692],
           [-0.1727, -0.0601,  0.0923, -0.1570,  0.0112]]],
 
 
         [[[ 0.1131, -0.1769, -0.0813, -0.1729, -0.0422],
           [ 0.1862, -0.1318, -0.0366,  0.0208,  0.0670],
           [-0.0347, -0.1310,  0.1993,  0.1946, -0.0452],
           [-0.1878,  0.0835,  0.1138, -0.1432,  0.1727],
           [ 0.1229,  0.0624, -0.1178,  0.0164, -0.1683]]],
 
 
         [[[ 0.0545,  0.1030, -0.1410,  0.1820, -0.1093],
           [-0.1948, -0.1214, -

In [5]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[-0.0225, -0.1127,  0.0398, -0.0068,  0.0232, -0.1146,  0.0824,  0.1030,
         -0.0457,  0.0136]], grad_fn=<AddmmBackward0>)


In [6]:
net.zero_grad()
out.backward(torch.randn(1, 10))

## 메모
- torch.nn은 미니 배치만 지원
- ex) nn.Conv2d의 4D Tensor를 사용할때 nSample x nChannel x Height x Width
- 단일 샘플이 있는 경우 input.unsqueeze(0)을 사용해 가짜 배치 차원 추가

## 요약
- torch.Tensor -와 같은 autograd작업을 지원하는 다차원 배열(backward)에 그래디언트 유지
- nn.Module : 신경망 모듈, 매개변수를 캡슐화하는 편리한 방법, GPU 이동, 내보내기, 로드 도우미
- nn.Parameter : Tensor의 일종으로 Module의 매개변수가 자동으로 입력됨
- autograd.Function : forward와 backward 자동미분을 할 때 사용 모든 텐서 연산은 인코드되어 기록을 남기고 싱글 함수에 알린다

# Loss Function

In [10]:
output = net(input)
print(output)
target = torch.randn(10)
target

tensor([[-0.0225, -0.1127,  0.0398, -0.0068,  0.0232, -0.1146,  0.0824,  0.1030,
         -0.0457,  0.0136]], grad_fn=<AddmmBackward0>)


tensor([ 0.7400, -0.5734,  1.4240,  0.8839, -0.0608,  1.1917, -0.9162, -1.2311,
         0.3222, -0.0873])

In [8]:
target = target.view(1, -1)
target

tensor([[-0.4923, -1.7824,  0.5558,  1.4320, -0.6019, -0.1789,  0.4556,  2.5686,
         -0.0158,  0.0591]])

In [23]:
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

tensor(0.8139, grad_fn=<MseLossBackward0>)


  return F.mse_loss(input, target, reduction=self.reduction)


In [None]:
# input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
#       -> flatten -> linear -> relu -> linear -> relu -> linear
#       -> MSELoss
#       -> loss

# Backprop

In [24]:
net.zero_grad()

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([-0.0043, -0.0014,  0.0064, -0.0013,  0.0056,  0.0078])


# Update the weights

weight = weight - learning_rate * gradient

In [25]:
learning_rate = 0.01
for f in net.parameters():
  f.data.sub_(f.grad.data * learning_rate)

In [26]:
# torch.optim을 통해 간단하게 옵티마이저 가능

import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr = 0.01)

optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()

  return F.mse_loss(input, target, reduction=self.reduction)
