In [1]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
"""
때로는 기존의 모듈을 이어붙인 것보다 더 복잡한 모델을 만들어 사용하고 싶을 때도 있습니다. 
이런 경우, nn.Module을 상속받아 서브클래스를 만들고 forward을 정의하여 자신만의 모듈을 만들 수 있습니다. 
forward 안에서는 입력 Variable을 받아 다른 모듈과 다른 autograd 연산을 이용하여 
출력 Variable을 만드는 역할을 합니다.
"""
class LeNet(nn.Module):
    
    def __init__(self):
        super(LeNet, self).__init__()
        # 6 1x5x5 convolution kernels
        self.conv1 = nn.Conv2d(1, 6, 5) 
        self.conv2 = nn.Conv2d(6, 16, 5)
        
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        # Max pooling over a (2, 2) window w/o zero-padding
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        # convert (N, C, W, H) to (N, CxWxH)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [3]:
net = LeNet()
print(net)

LeNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [4]:
params = list(net.parameters())
print(len(params))
print(params[0].size())

10
torch.Size([6, 1, 5, 5])


In [5]:
X = Variable(torch.randn(1, 1, 32, 32))
y = Variable(torch.randn(1, 10))

In [7]:
learning_rate = 1e-2
optimizer = optim.SGD(net.parameters(), learning_rate)

for t in range(500):
    y_pred = net(X)
    loss = F.mse_loss(y_pred, y)
    
    loss.backward()
    
    optimizer.step()
    optimizer.zero_grad()

In [8]:
print(net(X))
print(y)
print(F.mse_loss(net(X), y))

tensor([[ 1.3297e+00,  3.7250e-01,  1.5625e+00, -5.8845e-01, -1.5130e-01,
         -2.6408e-01,  1.0245e+00,  2.5273e-02, -2.8397e-04,  6.3965e-01]],
       grad_fn=<AddmmBackward>)
tensor([[ 1.3297e+00,  3.7250e-01,  1.5625e+00, -5.8845e-01, -1.5130e-01,
         -2.6408e-01,  1.0245e+00,  2.5273e-02, -2.8378e-04,  6.3965e-01]])
tensor(4.5630e-14, grad_fn=<MseLossBackward>)
