### 활성화 함수
- __init__() 함수에서는 모델에서 사용될 모듈(nn.Linear 등)과 activation function(활성화 함수) 등을 정의함
- forward() 함수에서 실행되어야 하는 연산에 활성화 함수도 적용하면 됨
- 주요 활성화 함수
    - 시그모이드 함수 : nn.Sigmoid()
    - ReLU 함수 : nn.ReLU()
    - Leaky ReLU 함수 : nn.LeakyReLU()

In [16]:
import torch
import torch.nn as nn

class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.linear = nn.Linear(in_features=input_dim, out_features=output_dim)
        self.activation = nn.Sigmoid() # 시그모이드 함수
    def forward(self, x):
        return self.activation(self.linear(x))

In [18]:
x = torch.ones(4)
y = torch.zeros(3)
model = LinearRegressionModel(4, 3)
loss_function = nn.MSELoss()

In [19]:
learning_rate = 0.01
nb_epochs = 1000
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(nb_epochs + 1):
    
    y_pred = model(x)
    loss = loss_function(y_pred, y)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [22]:
print(loss)
for param in model.parameters():
    print(param)

tensor(0.0200, grad_fn=<MseLossBackward0>)
Parameter containing:
tensor([[-0.4241, -0.0435, -0.2485, -0.6506],
        [-0.1905, -0.5900,  0.1035, -0.6944],
        [-0.5265, -0.2848, -0.6738,  0.0304]], requires_grad=True)
Parameter containing:
tensor([-0.5693, -0.4355, -0.2349], requires_grad=True)


### 다층 레이어 구현
> raw level 로 구현해본 후, 좀더 유용한 클래스를 알아보기로

- input layer -> hidden layer -> output layer 순으로 순차적으로 작성해주면 됨
    - 내부 행렬곱 조건만 유의해주면 됨

- activation function 적용은 output layer 에는 적용하지 않는 것이 일반적임

In [23]:
import torch
import torch.nn as nn

class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.linear1 = nn.Linear(input_dim, 10)
        self.linear2 = nn.Linear(10, 10)
        self.linear3 = nn.Linear(10, 10)
        self.linear4 = nn.Linear(10, output_dim)
        self.activation = nn.LeakyReLU(0.1)
        
    def forward(self, x):
        # |x| = (input_dim, output_dim)
        hidden = self.activation(self.linear1(x)) # |hidden| = (input_dim, 5)
        hidden = self.activation(self.linear2(hidden)) # |hidden| = (5, 5)
        hidden = self.activation(self.linear3(hidden)) # |hidden| = (5, 5)
        y = self.linear4(hidden) # 마지막 출력에는 activation 함수를 사용하지 않는 것이 일반적임
        return y
        

In [24]:
x = torch.ones(4)
y = torch.zeros(3)
model = LinearRegressionModel(4, 3)
loss_function = nn.MSELoss()

In [25]:
learning_rate = 0.01
nb_epochs = 1000
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(nb_epochs + 1):
    
    y_pred = model(x)
    loss = loss_function(y_pred, y)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [26]:
print(loss)
for param in model.parameters():
    print(param)

tensor(8.9183e-13, grad_fn=<MseLossBackward0>)
Parameter containing:
tensor([[-0.2594,  0.2687, -0.1575,  0.0258],
        [-0.0046,  0.1706,  0.0414, -0.2126],
        [-0.4967, -0.3701,  0.3450, -0.2120],
        [ 0.2302, -0.0048,  0.0419, -0.0267],
        [ 0.0569, -0.1314, -0.2341, -0.0464],
        [-0.0558, -0.1660, -0.2447,  0.1743],
        [-0.3571, -0.0055,  0.3971, -0.2283],
        [-0.0469, -0.1339,  0.3037,  0.0028],
        [ 0.5048,  0.2312,  0.0802, -0.2120],
        [ 0.0446, -0.2198,  0.2883,  0.1191]], requires_grad=True)
Parameter containing:
tensor([ 0.2643, -0.2484, -0.2908, -0.0925, -0.2556, -0.4381,  0.0411, -0.0194,
        -0.4093, -0.2954], requires_grad=True)
Parameter containing:
tensor([[ 0.2516, -0.1534, -0.1862, -0.0030,  0.2118,  0.3057,  0.0363,  0.1584,
          0.2502, -0.2606],
        [ 0.2126, -0.1156,  0.0283,  0.0137,  0.0626, -0.0864,  0.0269,  0.2058,
          0.2150,  0.2660],
        [-0.0787, -0.0700,  0.1254, -0.0005, -0.0781,  0.1856

### nn.Sequential

- nn.Sequential 은 순서를 갖는 모듈의 컨테이너를 의미함
- 순차적으로 연산되는 레이어만 있을 경우에는, nn.Sequential을 통해 순서대로 각 레이어를 작성하면 그대로 실행됨
    - 중간에 activation function이 적용된다면, activation function도 순서에 맞게 넣어주면 자동 계산됨

In [27]:
print(x.size(0))
print(y.size(0))

4
3


In [28]:
input_dim = x.size(0)
output_dim = y.size(0)

model = nn.Sequential(
    nn.Linear(input_dim, 10),
    nn.LeakyReLU(0.1),
    nn.Linear(10, 10),
    nn.LeakyReLU(0.1),
    nn.Linear(10, 10),
    nn.LeakyReLU(0.1),
    nn.Linear(10, output_dim)
)

In [29]:
learning_rate = 0.01
nb_epochs = 1000
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(nb_epochs + 1):
    
    y_pred = model(x)
    loss = loss_function(y_pred, y)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
print(loss)
for param in model.parameters():
    print(param)

tensor(9.7523e-10, grad_fn=<MseLossBackward0>)
Parameter containing:
tensor([[-0.2444,  0.4977, -0.4331, -0.2907],
        [ 0.0037,  0.2101,  0.0058, -0.4933],
        [-0.2397,  0.3261, -0.3702, -0.1141],
        [-0.4896,  0.0995,  0.3092,  0.4234],
        [ 0.4409,  0.2386, -0.1198, -0.3903],
        [ 0.1417,  0.1793,  0.4791, -0.1199],
        [ 0.0262,  0.4756, -0.3681,  0.0800],
        [ 0.0783, -0.4238, -0.0289,  0.4479],
        [ 0.2518,  0.0613,  0.4202,  0.1782],
        [-0.4930, -0.4162,  0.4472,  0.3580]], requires_grad=True)
Parameter containing:
tensor([ 0.0204, -0.3141, -0.4616,  0.2718, -0.3736,  0.2561, -0.0242, -0.4492,
        -0.2618,  0.0959], requires_grad=True)
Parameter containing:
tensor([[ 3.0888e-01,  3.5507e-02,  1.3827e-01, -1.6195e-01,  2.1074e-01,
         -6.4053e-02, -7.2814e-02,  2.1355e-01, -6.8119e-02, -8.8605e-02],
        [ 2.5369e-01, -1.2928e-01, -3.9691e-02, -1.5260e-01, -1.8764e-01,
          1.5276e-01, -5.6802e-02, -2.3592e-01,  6.0384e