## 6.1 인공 뉴런

[활성화함수]
- 비선형이다.
- 미분 가능하다. 기울기 계산이 가능해진다. Hardtanh나 ReLU에서 볼 수 있는 불연속점은 큰 문제가 되지 않는다.

## 6.2 파이토치 nn 모듈

### 6.2.1 forward 대신 __call__ 사용하기

- nn.Module 인스턴스 호출하면, 동일한 인자로 forward 호출한다.
    * forward : 순방향 연산을 수행
    * __call__ : forward 호출하기 전후에 몇 가지 중요한 작업 수행

y = model(x)  
y = model.forward(x)  <- 이렇게 하지말자 !

In [2]:
import torch
import torch.nn as nn

In [3]:
t_c = [0.5,  14.0, 15.0, 28.0, 11.0,  8.0,  3.0, -4.0,  6.0, 13.0, 21.0]
t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]
t_c = torch.tensor(t_c).unsqueeze(1)  # B X Nin으로 reshape
t_u = torch.tensor(t_u).unsqueeze(1)

t_u.shape

torch.Size([11, 1])

In [4]:
n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices

(tensor([5, 6, 2, 7, 9, 3, 0, 4, 1]), tensor([10,  8]))

In [5]:
t_u_train = t_u[train_indices]
t_c_train = t_c[train_indices]

t_u_val = t_u[val_indices]
t_c_val = t_c[val_indices]

t_un_train = 0.1 * t_u_train
t_un_val = 0.1 * t_u_val

In [6]:
linear_model = nn.Linear(1, 1)
linear_model(t_un_val)

tensor([[-3.0799],
        [-2.3856]], grad_fn=<AddmmBackward0>)

In [7]:
linear_model.weight, linear_model.bias

(Parameter containing:
 tensor([[-0.3471]], requires_grad=True),
 Parameter containing:
 tensor([-0.7055], requires_grad=True))

### 6.2.2 선형 모델
- nn.Linear 생성자는 세 개의 인자
    * 입력 피처의 수
    * 출력 피처의 수
    * 선형 모델이 편향값을 포함하는지 여부 (기본값 True)

In [8]:
import torch.nn as nn

In [9]:
linear_model = nn.Linear(1, 1) # 입력 크기, 출력 크기
linear_model(t_un_val)

tensor([[6.3460],
        [4.6559]], grad_fn=<AddmmBackward0>)

In [10]:
linear_model.weight

Parameter containing:
tensor([[0.8450]], requires_grad=True)

In [11]:
linear_model.bias

Parameter containing:
tensor([0.5659], requires_grad=True)

In [12]:
# 입력값으로 모듈 호출
x = torch.ones(1)
linear_model(x)

tensor([1.4109], grad_fn=<ViewBackward0>)

In [13]:
# 배치 입력 만들기
# B : 배치 크기, Nin : 입력 피처 크기 -> B X Nin인 입력 텐서
x = torch.ones(10, 1)
linear_model(x)

tensor([[1.4109],
        [1.4109],
        [1.4109],
        [1.4109],
        [1.4109],
        [1.4109],
        [1.4109],
        [1.4109],
        [1.4109],
        [1.4109]], grad_fn=<AddmmBackward0>)

In [14]:
# 배치 최적화
import torch.optim as optim

linear_model = nn.Linear(1, 1)
optimizer = optim.SGD(linear_model.parameters(), lr = 1e-2)

In [15]:
linear_model.parameters()

<generator object Module.parameters at 0x000001B543324BA0>

In [16]:
list(linear_model.parameters())

[Parameter containing:
 tensor([[-0.0955]], requires_grad=True),
 Parameter containing:
 tensor([0.6270], requires_grad=True)]

In [17]:
def training_loop(n_epochs, optimizer, model, loss_fn, t_u_train, t_u_val, t_c_train, t_c_val) :
    for epoch in range(1, n_epochs + 1) :
        t_p_train = model(t_u_train)
        loss_train = loss_fn(t_p_train, t_c_train)
        
        t_p_val = model(t_u_val)
        loss_val = loss_fn(t_p_val, t_c_val)
        
        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()
        
        if epoch==1 or epoch % 1000 == 0 :
            print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"
                  f" Validation loss {loss_val.item():.4f}")

In [18]:
training_loop(n_epochs=3000, optimizer=optimizer, model=linear_model, loss_fn = nn.MSELoss(),
             t_u_train = t_un_train, t_u_val = t_un_val, t_c_train=t_c_train, t_c_val=t_c_val)

Epoch 1, Training loss 175.9975, Validation loss 238.0677
Epoch 1000, Training loss 3.0792, Validation loss 7.5947
Epoch 2000, Training loss 2.4508, Validation loss 5.7278
Epoch 3000, Training loss 2.4377, Validation loss 5.4851


In [19]:
print(linear_model.weight)
print(linear_model.bias)

Parameter containing:
tensor([[5.2241]], requires_grad=True)
Parameter containing:
tensor([-16.4563], requires_grad=True)


## 6.3 신경망

### 6.3.1 선형 모델 대체하기
- model 만 다시 정의
1) 선형 + 은닉층 ; 출력층의 입력으로 넣어줌

In [20]:
seq_model = nn.Sequential(
    nn.Linear(1, 13),
    nn.Tanh(),
    nn.Linear(13, 1)) # 전의 출력이 13이어서 입력이 13

In [21]:
seq_model

Sequential(
  (0): Linear(in_features=1, out_features=13, bias=True)
  (1): Tanh()
  (2): Linear(in_features=13, out_features=1, bias=True)
)

### 6.3.2 파라미터

In [22]:
[param for param in seq_model.parameters()]

[Parameter containing:
 tensor([[-0.7463],
         [-0.5751],
         [ 0.2986],
         [ 0.7412],
         [-0.7038],
         [-0.0125],
         [ 0.8626],
         [-0.7534],
         [-0.8630],
         [-0.6267],
         [-0.7755],
         [-0.3714],
         [ 0.8521]], requires_grad=True),
 Parameter containing:
 tensor([-0.8419,  0.6717, -0.7930,  0.4918, -0.0159,  0.0947, -0.4451, -0.7972,
         -0.8252, -0.5592, -0.2213, -0.1688,  0.6178], requires_grad=True),
 Parameter containing:
 tensor([[ 0.1598, -0.1865, -0.1595,  0.2061, -0.0449,  0.1190,  0.2271,  0.1098,
           0.1829, -0.0629,  0.0807, -0.0708,  0.2384]], requires_grad=True),
 Parameter containing:
 tensor([0.0756], requires_grad=True)]

In [23]:
[param.shape for param in seq_model.parameters()]  # 최적화 함수로 전달될 텐서

[torch.Size([13, 1]), torch.Size([13]), torch.Size([1, 13]), torch.Size([1])]

In [24]:
for name, param in seq_model.named_parameters() :
    print(name, param.shape)

0.weight torch.Size([13, 1])
0.bias torch.Size([13])
2.weight torch.Size([1, 13])
2.bias torch.Size([1])


In [25]:
# 각 모듈의 이름 정하여 전달 ; 서브 모듈
from collections import OrderedDict
seq_model = nn.Sequential(OrderedDict([
        ('hidden_linear', nn.Linear(1, 8)),
        ('hidden_activation', nn.Tanh()),
        ('output_linear', nn.Linear(8, 1))
]))
seq_model

Sequential(
  (hidden_linear): Linear(in_features=1, out_features=8, bias=True)
  (hidden_activation): Tanh()
  (output_linear): Linear(in_features=8, out_features=1, bias=True)
)

In [26]:
for name, param in seq_model.named_parameters() :
    print(name, param.shape)

hidden_linear.weight torch.Size([8, 1])
hidden_linear.bias torch.Size([8])
output_linear.weight torch.Size([1, 8])
output_linear.bias torch.Size([1])


In [27]:
seq_model.output_linear.bias

Parameter containing:
tensor([0.0610], requires_grad=True)

In [28]:
optimizer = optim.SGD(seq_model.parameters(), lr=1e-3)

training_loop(n_epochs=5000, optimizer=optimizer, model=seq_model, loss_fn=nn.MSELoss(),
             t_u_train = t_un_train, t_u_val = t_un_val, t_c_train=t_c_train, t_c_val=t_c_val)

Epoch 1, Training loss 165.5347, Validation loss 224.1932
Epoch 1000, Training loss 6.3508, Validation loss 7.2606
Epoch 2000, Training loss 5.0778, Validation loss 6.6529
Epoch 3000, Training loss 2.6067, Validation loss 3.0835
Epoch 4000, Training loss 2.0976, Validation loss 2.4162
Epoch 5000, Training loss 1.9050, Validation loss 1.9567


In [29]:
print('output', seq_model(t_un_val))
print('answer', t_c_val)
print('hidden', seq_model.hidden_linear.weight.grad) # 은닉층의 선형 영역에서 weight의 기울기 출력 ; training_loop 돌리고 마지막 epoch 후에 만들어진 기울기

output tensor([[20.3467],
        [ 7.8488]], grad_fn=<AddmmBackward0>)
answer tensor([[21.],
        [ 6.]])
hidden tensor([[-0.8176],
        [ 0.8425],
        [-0.8772],
        [-0.7650],
        [-0.1550],
        [-0.0017],
        [-0.0044],
        [-0.0359]])


### 6.3.3 선형 모델과 비교하기

In [30]:
from matplotlib import pyplot as plt

t_range = torch.arange(20., 90.).unsqueeze(1)
t_range.shape

torch.Size([70, 1])

In [31]:
t_range

tensor([[20.],
        [21.],
        [22.],
        [23.],
        [24.],
        [25.],
        [26.],
        [27.],
        [28.],
        [29.],
        [30.],
        [31.],
        [32.],
        [33.],
        [34.],
        [35.],
        [36.],
        [37.],
        [38.],
        [39.],
        [40.],
        [41.],
        [42.],
        [43.],
        [44.],
        [45.],
        [46.],
        [47.],
        [48.],
        [49.],
        [50.],
        [51.],
        [52.],
        [53.],
        [54.],
        [55.],
        [56.],
        [57.],
        [58.],
        [59.],
        [60.],
        [61.],
        [62.],
        [63.],
        [64.],
        [65.],
        [66.],
        [67.],
        [68.],
        [69.],
        [70.],
        [71.],
        [72.],
        [73.],
        [74.],
        [75.],
        [76.],
        [77.],
        [78.],
        [79.],
        [80.],
        [81.],
        [82.],
        [83.],
        [84.],
        [85.],
        [8

In [None]:
fig = plt.figure(dpi = 600)
plt.xlabel('화씨')
plt.ylabel('섭씨')
plt.plot(t_u.numpy(), t_c.numpy(), 'o')
plt.plot(t_range.numpy(), seq_model(0.1 * t_range).detach().numpy(), 'c-')
plt.plot(t_u.numpy(), seq_model(0.1 * t_u).detach().numpy(), 'kx')
# 과적합

[<matplotlib.lines.Line2D at 0x1b545a6ab50>]