In [19]:
%matplotlib inline
import numpy as np
import torch
import torch.optim as optim
torch.set_printoptions(edgeitems=2, linewidth=75)
t_c = [0.5,  14.0, 15.0, 28.0, 11.0,  8.0,  3.0, -4.0,  6.0, 13.0, 21.0]
t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]
t_c = torch.tensor(t_c).unsqueeze(1) # <1>
t_u = torch.tensor(t_u).unsqueeze(1) # <1>


In [20]:
n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

t_u_train = t_u[train_indices]
t_c_train = t_c[train_indices]

t_u_val = t_u[val_indices]
t_c_val = t_c[val_indices]

t_un_train = 0.1 * t_u_train
t_un_val = 0.1 * t_u_val

t_un_train , t_un_val

(tensor([[6.0400],
         [5.8200],
         [4.8900],
         [6.8400],
         [4.8400],
         [3.5700],
         [5.6300],
         [5.5900],
         [2.1800]]),
 tensor([[3.3900],
         [8.1900]]))

In [21]:
import torch.nn as nn
linear_model = nn.Linear(1, 1) # 입력 피처1 , 출력 피처1 을 가진 nn.Linear 인스턴스 생성
linear_model(t_un_val)
linear_model.weight , linear_model.bias

(Parameter containing:
 tensor([[-0.5830]], requires_grad=True),
 Parameter containing:
 tensor([0.3171], requires_grad=True))

In [22]:
x = torch.ones(10,1) # nn에 잇는 모든 모듈은 한번에 여러 입력을 가진 매치에 대한 출력을 만들도록 작성 
linear_model(x)


tensor([[-0.2659],
        [-0.2659],
        [-0.2659],
        [-0.2659],
        [-0.2659],
        [-0.2659],
        [-0.2659],
        [-0.2659],
        [-0.2659],
        [-0.2659]], grad_fn=<AddmmBackward0>)

In [23]:
t_c = [0.5,  14.0, 15.0, 28.0, 11.0,  8.0,  3.0, -4.0,  6.0, 13.0, 21.0]
t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]
t_c = torch.tensor(t_c).unsqueeze(1) # 10*1
t_u = torch.tensor(t_u).unsqueeze(1) # 10*1

linear_model = nn.Linear(1, 1) # 입력피처1 , 출력피처1
optimizer = optim.SGD(linear_model.parameters(),lr=1e-2)

list(linear_model.parameters()) # init생성자에 정의된 서브 모듈까지 재귀적으로 호출하며 만나는 모든 파마리터 리스트를 담은 리스트

[Parameter containing:
 tensor([[-0.0444]], requires_grad=True),
 Parameter containing:
 tensor([-0.4086], requires_grad=True)]

In [24]:
def training_loop(n_epochs, optimizer, model, loss_fn, t_u_train, t_u_val,t_c_train, t_c_val):
    for epoch in range(1, n_epochs + 1):
        t_p_train = model(t_u_train) # <1>
        loss_train = loss_fn(t_p_train, t_c_train)

        t_p_val = model(t_u_val) # <1>
        loss_val = loss_fn(t_p_val, t_c_val)
        
        optimizer.zero_grad()
        loss_train.backward() # <2>
        optimizer.step()

        if epoch == 1 or epoch % 1000 == 0:
            print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"
                  f" Validation loss {loss_val.item():.4f}")

In [25]:
training_loop(
    n_epochs = 3000, 
    optimizer = optimizer,
    model = linear_model,
    loss_fn = nn.MSELoss(), # <1>
    t_u_train = t_un_train,
    t_u_val = t_un_val, 
    t_c_train = t_c_train,
    t_c_val = t_c_val)

linear_model.weight , linear_model.bias

Epoch 1, Training loss 154.0337, Validation loss 420.2544
Epoch 1000, Training loss 4.3387, Validation loss 10.1400
Epoch 2000, Training loss 2.8633, Validation loss 5.1968
Epoch 3000, Training loss 2.7436, Validation loss 4.7359


(Parameter containing:
 tensor([[5.2465]], requires_grad=True),
 Parameter containing:
 tensor([-17.0517], requires_grad=True))

In [26]:
import torch.nn as nn
seq_model2 = nn.Sequential(
    nn.Linear(1,13),
    nn.Tanh(),
    nn.Linear(13,1)
)
seq_model2


Sequential(
  (0): Linear(in_features=1, out_features=13, bias=True)
  (1): Tanh()
  (2): Linear(in_features=13, out_features=1, bias=True)
)

In [27]:
for name , params in seq_model2.named_parameters():
    print(name , params , params.shape) # 처음에는 임의의 값 배정

0.weight Parameter containing:
tensor([[-0.3874],
        [-0.7413],
        [-0.8002],
        [-0.2198],
        [ 0.3769],
        [ 0.3937],
        [ 0.6255],
        [ 0.9311],
        [-0.4350],
        [ 0.2844],
        [-0.0528],
        [ 0.8863],
        [-0.5240]], requires_grad=True) torch.Size([13, 1])
0.bias Parameter containing:
tensor([ 0.4774, -0.3199, -0.5489, -0.0896,  0.5627,  0.9168,  0.7239,
         0.5349, -0.7586, -0.1637,  0.9310, -0.7483,  0.1741],
       requires_grad=True) torch.Size([13])
2.weight Parameter containing:
tensor([[-0.0897, -0.2093, -0.2384,  0.0468, -0.2190, -0.2327,  0.1066,
         -0.0152,  0.0748,  0.0870,  0.2360, -0.1212, -0.1435]],
       requires_grad=True) torch.Size([1, 13])
2.bias Parameter containing:
tensor([0.1347], requires_grad=True) torch.Size([1])


In [32]:
from collections import OrderedDict
seq_model = nn.Sequential(OrderedDict([
    ('hidden_linear', nn.Linear(1, 10)),
    ('hidden_activation', nn.Tanh()),
    ('output_linear', nn.Linear(10, 1))
]))

seq_model

Sequential(
  (hidden_linear): Linear(in_features=1, out_features=10, bias=True)
  (hidden_activation): Tanh()
  (output_linear): Linear(in_features=10, out_features=1, bias=True)
)

In [33]:
seq_model.hidden_linear.bias , seq_model.hidden_linear.bias.shape , seq_model.hidden_linear.weight , seq_model.hidden_linear.weight.shape

(Parameter containing:
 tensor([ 0.6921,  0.9266, -0.4149,  0.1226,  0.0189,  0.2917,  0.7501,
          0.2849,  0.1488,  0.6043], requires_grad=True),
 torch.Size([10]),
 Parameter containing:
 tensor([[-0.8984],
         [ 0.6960],
         [-0.3153],
         [-0.0466],
         [-0.0333],
         [-0.4494],
         [ 0.0464],
         [-0.4921],
         [-0.3113],
         [ 0.4602]], requires_grad=True),
 torch.Size([10, 1]))

In [34]:
seq_model.output_linear.bias , seq_model.output_linear.bias.shape , seq_model.output_linear.weight , seq_model.output_linear.weight.shape

(Parameter containing:
 tensor([0.2412], requires_grad=True),
 torch.Size([1]),
 Parameter containing:
 tensor([[ 0.1916,  0.2202,  0.0426,  0.1614, -0.0426,  0.0640,  0.2522,
           0.2314,  0.1024, -0.1974]], requires_grad=True),
 torch.Size([1, 10]))

In [35]:
optimizer = optim.SGD(seq_model.parameters(), lr=1e-3) # <1>

training_loop(
    n_epochs = 5000, 
    optimizer = optimizer,
    model = seq_model,
    loss_fn = nn.MSELoss(),
    t_u_train = t_un_train,
    t_u_val = t_un_val, 
    t_c_train = t_c_train,
    t_c_val = t_c_val)
    
print('output', seq_model(t_un_val))
print('answer', t_c_val)
# print('hidden', seq_model.hidden_linear.weight.grad)

Epoch 1, Training loss 144.4363, Validation loss 402.2155
Epoch 1000, Training loss 3.9826, Validation loss 41.2819
Epoch 2000, Training loss 2.5164, Validation loss 24.6535
Epoch 3000, Training loss 1.9607, Validation loss 17.5083
Epoch 4000, Training loss 2.5127, Validation loss 10.8832
Epoch 5000, Training loss 1.9951, Validation loss 10.6527
output tensor([[-0.6002],
        [24.0103]], grad_fn=<AddmmBackward0>)
answer tensor([[ 3.],
        [28.]])
