In [99]:
%matplotlib inline
import numpy as np
import torch
torch.set_printoptions(edgeitems=2, linewidth=75)

In [100]:
t_c = torch.tensor([0.5, 14.0, 15.0, 28.0, 11.0,
                    8.0, 3.0, -4.0, 6.0, 13.0, 21.0])
t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9,
                    33.9, 21.8, 48.4, 60.4, 68.4])
t_un = 0.1 * t_u

In [101]:
def model(t_u, w, b):
    return w * t_u + b

In [102]:
def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c)**2
    return squared_diffs.mean()

In [103]:
import torch.optim as optim

dir(optim)

['ASGD',
 'Adadelta',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'NAdam',
 'Optimizer',
 'RAdam',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_functional',
 '_multi_tensor',
 'lr_scheduler',
 'swa_utils']

In [104]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-5
optimizer = optim.SGD([params], lr=learning_rate)

In [105]:
t_p = model(t_u, *params)
loss = loss_fn(t_p, t_c)
loss.backward()

optimizer.step()

params

tensor([ 9.5483e-01, -8.2600e-04], requires_grad=True)

In [106]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

t_p = model(t_un, *params)
loss = loss_fn(t_p, t_c)

optimizer.zero_grad() # <1>
loss.backward()
optimizer.step()

params

tensor([1.7761, 0.1064], requires_grad=True)

In [107]:
def training_loop(n_epochs, optimizer, params, t_u, t_c):
    for epoch in range(1, n_epochs + 1):
        t_p = model(t_u, *params) 
        loss = loss_fn(t_p, t_c)
        
        optimizer.zero_grad() # reset grad of param, must do it before backward
        loss.backward()
        optimizer.step() # update params.

        if epoch % 500 == 0:
            print('Epoch %d, Loss %f' % (epoch, float(loss)))
            
    return params

In [108]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate) # <1>

training_loop(
    n_epochs = 5000, 
    optimizer = optimizer,
    params = params, # <1> 
    t_u = t_un,
    t_c = t_c)

Epoch 500, Loss 7.860120
Epoch 1000, Loss 3.828538
Epoch 1500, Loss 3.092191
Epoch 2000, Loss 2.957698
Epoch 2500, Loss 2.933134
Epoch 3000, Loss 2.928648
Epoch 3500, Loss 2.927830
Epoch 4000, Loss 2.927679
Epoch 4500, Loss 2.927652
Epoch 5000, Loss 2.927647


tensor([  5.3671, -17.3012], requires_grad=True)

In [109]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-1 # note that bigger learning rate is okay
optimizer = optim.Adam([params], lr=learning_rate) # Adam!!

training_loop(
    n_epochs = 2000, 
    optimizer = optimizer,
    params = params,
    t_u = t_u, # didn't used scaled input, but it works well!
    t_c = t_c)

Epoch 500, Loss 7.612900
Epoch 1000, Loss 3.086700
Epoch 1500, Loss 2.928579
Epoch 2000, Loss 2.927644


tensor([  0.5367, -17.3021], requires_grad=True)

## 과적합 overfitting 문제 다루기
* traning set과 validation set 나누기

In [110]:
n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples) # take 20% of whole data for validation set

shuffled_indices = torch.randperm(n_samples) # random permuation between 0-11

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices  # <1>

(tensor([ 5,  9,  1, 10,  3,  8,  4,  7,  6]), tensor([2, 0]))

In [111]:
train_t_u = t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

train_t_un = 0.1 * train_t_u
val_t_un = 0.1 * val_t_u

In [112]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,
                  train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params) # <1>
        train_loss = loss_fn(train_t_p, train_t_c)
                             
        val_t_p = model(val_t_u, *params) # <1>
        val_loss = loss_fn(val_t_p, val_t_c)
        
        optimizer.zero_grad()
        train_loss.backward() # donot use val_loss for calculate backward
        optimizer.step()

        if epoch <= 3 or epoch % 500 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f},"
                  f" Validation loss {val_loss.item():.4f}")
            
    return params

In [113]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs = 3000, 
    optimizer = optimizer,
    params = params,
    train_t_u = train_t_un, # <1> 
    val_t_u = val_t_un, # <1> 
    train_t_c = train_t_c,
    val_t_c = val_t_c)

Epoch 1, Training loss 87.8123, Validation loss 46.8487
Epoch 2, Training loss 36.9974, Validation loss 27.8161
Epoch 3, Training loss 30.0925, Validation loss 29.7801
Epoch 500, Training loss 7.6496, Validation loss 9.7513
Epoch 1000, Training loss 3.9647, Validation loss 4.2404
Epoch 1500, Training loss 3.3378, Validation loss 2.6923
Epoch 2000, Training loss 3.2312, Validation loss 2.1771
Epoch 2500, Training loss 3.2130, Validation loss 1.9856
Epoch 3000, Training loss 3.2099, Validation loss 1.9102


tensor([  5.2384, -16.5813], requires_grad=True)

In [114]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,
                  train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params)
        train_loss = loss_fn(train_t_p, train_t_c)

        with torch.no_grad(): # context manager
            val_t_p = model(val_t_u, *params)
            val_loss = loss_fn(val_t_p, val_t_c)
            assert val_loss.requires_grad == False # confirm that val_loss tensor graph don't require grad
            
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

In [115]:
def calc_forward(t_u, t_c, is_train):
    with torch.set_grad_enabled(is_train): # if is_train is ture, model's params require grad
                                          # but is_train is false, model cann't do backward (it has no grad) 
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)
    return loss

def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,
                  train_t_c, val_t_c, is_train):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params)
        train_loss = calc_forward(train_t_p, train_t_c, is_train)

        with torch.no_grad(): # context manager
            val_t_p = model(val_t_u, *params)
            val_loss = loss_fn(val_t_p, val_t_c)
            assert val_loss.requires_grad == False # confirm that val_loss tensor graph don't require grad
            
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
    
        if epoch <= 3 or epoch % 500 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f},"
                  f" Validation loss {val_loss.item():.4f}")
            
    return params

In [116]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-3
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs=3000,
    optimizer=optimizer,
    params=params, 
    train_t_u=train_t_un,
    train_t_c=train_t_c,
    val_t_c=val_t_c,
    val_t_u=val_t_un,
    is_train=True)

Epoch 1, Training loss 87.8123, Validation loss 46.8487
Epoch 2, Training loss 59.9732, Validation loss 40.1690
Epoch 3, Training loss 41.3802, Validation loss 35.6225
Epoch 500, Training loss 8.2579, Validation loss 27.1253
Epoch 1000, Training loss 3.6425, Validation loss 26.2438
Epoch 1500, Training loss 3.2356, Validation loss 26.0783
Epoch 2000, Training loss 3.2107, Validation loss 26.0440
Epoch 2500, Training loss 3.2094, Validation loss 26.0363
Epoch 3000, Training loss 3.2093, Validation loss 26.0345


tensor([ 2.2916, -5.0606], requires_grad=True)

### 연습문제
1. 모델을 w2 * t_u ** 2 + w1 * t_u + b로 다시 정의하자.

In [122]:
def model(t_u, w2, w1, b):
    return w2 * t_u ** 2 + w1 * t_u + b

In [124]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,
                  train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params) # <1>
        train_loss = loss_fn(train_t_p, train_t_c)
                             
        val_t_p = model(val_t_u, *params) # <1>
        val_loss = loss_fn(val_t_p, val_t_c)
        
        optimizer.zero_grad()
        train_loss.backward() # donot use val_loss for calculate backward
        optimizer.step()

        if epoch <= 3 or epoch % 500 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f},"
                  f" Validation loss {val_loss.item():.4f}")
            
    return params

In [139]:
learning_rate = 1e-3
params = torch.tensor([1.0, 1.0, 0.0], requires_grad=True)
optimizer = optim.Adam([params], lr=learning_rate)


training_loop(
    n_epochs = 3000, 
    optimizer = optimizer,
    params = params,
    train_t_u = train_t_u, 
    val_t_u = val_t_u,  
    train_t_c = train_t_c,
    val_t_c = val_t_c)

Epoch 1, Training loss 12813444.0000, Validation loss 6741603.0000
Epoch 2, Training loss 12787724.0000, Validation loss 6728070.5000
Epoch 3, Training loss 12762030.0000, Validation loss 6714550.5000
Epoch 500, Training loss 4014033.0000, Validation loss 2111736.2500
Epoch 1000, Training loss 841284.1875, Validation loss 442486.3438
Epoch 1500, Training loss 98510.9453, Validation loss 51773.6719
Epoch 2000, Training loss 5242.1709, Validation loss 2748.7769
Epoch 2500, Training loss 106.0136, Validation loss 59.8613
Epoch 3000, Training loss 5.9844, Validation loss 9.2872


tensor([ 0.0044,  0.0037, -0.9975], requires_grad=True)