In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

In [10]:
# 파이토치의 기본적인 모델 flow 구조

class Net(nn.Module):
    
    # 사용할 파라미터를 정의함 (neural net, convnet, lstm, 등등)
    def __init__(self):
        super(Net, self).__init__()
        # nn.Linear : 일반적인 뉴럴 네트워크
        self.fc1 = nn.Linear(20, 30)
        self.fc2 = nn.Linear(30, 40)
        self.fc3 = nn.Linear(40, 10)
        
    # 실제로 값이 통과하는 부분 (연산 작업들은 forward에서 진행)
    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        return x
        
        

In [11]:
net = Net()

In [12]:
# 모델 구조 파악
net

Net(
  (fc1): Linear(in_features=20, out_features=30, bias=True)
  (fc2): Linear(in_features=30, out_features=40, bias=True)
  (fc3): Linear(in_features=40, out_features=10, bias=True)
)

In [13]:
# 파라미터 직접 보기
net.state_dict()

OrderedDict([('fc1.weight',
              tensor([[ 8.7892e-02, -1.1548e-01,  1.6620e-01, -1.7928e-01, -1.3067e-01,
                        6.1698e-02, -1.0608e-01,  1.5505e-01,  1.1229e-01,  4.0509e-02,
                       -2.1093e-02,  1.8888e-01, -2.1567e-01, -4.6398e-02,  1.9795e-02,
                        4.3206e-02,  1.8654e-01,  1.1400e-01,  2.2060e-01,  1.2072e-01],
                      [-1.8755e-01, -1.0279e-01, -2.1753e-01,  1.3323e-01, -4.5611e-02,
                       -5.7762e-02, -5.1718e-02, -1.7470e-01,  4.4402e-02, -3.4107e-02,
                        1.8209e-01, -9.5874e-02,  5.6981e-03, -1.3616e-02, -6.9460e-02,
                        1.8790e-02, -8.9170e-02,  6.5433e-03, -1.5179e-01,  2.7733e-03],
                      [ 6.6114e-02, -1.2940e-02,  1.3772e-01,  1.1761e-01, -1.9706e-01,
                        1.0238e-01, -3.3200e-02,  3.3774e-02,  1.3501e-01,  8.6705e-02,
                        9.0084e-02, -1.7990e-01, -1.7097e-02,  2.2267e-01,  9.9010e-03,
  

In [14]:
# dimension size check하는 방법 (굉장히 많이 씀)
net.state_dict()['fc2.weight'].size()

torch.Size([40, 30])

In [15]:
# 20 앞의 숫자는 batch size

input_x = torch.randn(1, 20)
input_x

tensor([[-0.0989, -0.7890, -0.3475,  0.7860,  0.3738, -0.0164, -1.1611, -0.6929,
          2.3368, -0.5923,  1.8013, -0.6683, -0.7019,  0.6226, -0.3736,  1.4655,
         -0.2500, -0.0054, -0.2343, -0.2328]])

In [16]:
# input_x가 네트워크를 통과한 값
out = net(input_x)
out

tensor([[-0.1353,  0.0249,  0.2638, -0.1311, -0.0874, -0.2253, -0.0799,  0.0503,
         -0.1746, -0.0761]], grad_fn=<AddmmBackward>)

In [17]:
target = torch.ones(1,10)
target

tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])

In [18]:
# grad 계산 & weight 학습하는 과정
import torch.optim as optim

# 사용할 optimizer 정의 
# 가장 단순한 optimizer : Stochastic Gradient Descent
# 이 외에도 Adam, RMSProp... 등등이 있음
optimizer = optim.SGD(net.parameters(), lr=0.01)

# gradient 초기화
optimizer.zero_grad()

# output 뽑기
out = net(input_x)

# loss function 정의
criterion = nn.MSELoss()
# out, target 사이의 loss 계산
loss = criterion(out, target)
print(f"Loss : {loss}")

# loss 역전파 및 그래디언트 계산
loss.backward()
# weight 업데이트
optimizer.step()


Loss : 1.1350626945495605


In [19]:
for i in range(1000):
    optimizer.zero_grad()
    out = net(input_x)
    loss = criterion(out, target)
    if i % 10 == 0:
        print(loss)
    loss.backward()
    optimizer.step()

tensor(1.1207, grad_fn=<MseLossBackward>)
tensor(0.9922, grad_fn=<MseLossBackward>)
tensor(0.8871, grad_fn=<MseLossBackward>)
tensor(0.7851, grad_fn=<MseLossBackward>)
tensor(0.6782, grad_fn=<MseLossBackward>)
tensor(0.5605, grad_fn=<MseLossBackward>)
tensor(0.4343, grad_fn=<MseLossBackward>)
tensor(0.3077, grad_fn=<MseLossBackward>)
tensor(0.1958, grad_fn=<MseLossBackward>)
tensor(0.1123, grad_fn=<MseLossBackward>)
tensor(0.0597, grad_fn=<MseLossBackward>)
tensor(0.0310, grad_fn=<MseLossBackward>)
tensor(0.0165, grad_fn=<MseLossBackward>)
tensor(0.0091, grad_fn=<MseLossBackward>)
tensor(0.0052, grad_fn=<MseLossBackward>)
tensor(0.0031, grad_fn=<MseLossBackward>)
tensor(0.0018, grad_fn=<MseLossBackward>)
tensor(0.0011, grad_fn=<MseLossBackward>)
tensor(0.0007, grad_fn=<MseLossBackward>)
tensor(0.0004, grad_fn=<MseLossBackward>)
tensor(0.0002, grad_fn=<MseLossBackward>)
tensor(0.0001, grad_fn=<MseLossBackward>)
tensor(8.8952e-05, grad_fn=<MseLossBackward>)
tensor(5.4099e-05, grad_fn=<Ms

In [23]:
# 파이토치 layer 어떤식으로 진행되는지 테스트
fc1 = nn.Linear(2, 3)
print(fc1.state_dict())
x = torch.rand((1,2))
x

OrderedDict([('weight', tensor([[ 0.0843, -0.6223],
        [-0.3023, -0.6323],
        [ 0.6392, -0.1082]])), ('bias', tensor([ 0.4784, -0.0134, -0.3593]))])


tensor([[0.2110, 0.6396]])

In [24]:
x = fc1(x)
x

tensor([[ 0.0981, -0.4817, -0.2936]], grad_fn=<AddmmBackward>)

In [25]:
x = F.relu(x)
x

tensor([[0.0981, 0.0000, 0.0000]], grad_fn=<ReluBackward0>)