# 优化器

In [1]:
%matplotlib inline
import numpy as np
import torch
torch.set_printoptions(edgeitems=2, linewidth=75)

In [2]:
t_c = torch.tensor([0.5, 14.0, 15.0, 28.0, 11.0,
                    8.0, 3.0, -4.0, 6.0, 13.0, 21.0])
t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9,
                    33.9, 21.8, 48.4, 60.4, 68.4])
t_un = 0.1 * t_u

In [3]:
def model(t_u, w, b):
    return w * t_u + b

In [4]:
def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c)**2
    return squared_diffs.mean()

In [5]:
import torch.optim as optim

dir(optim)

['ASGD',
 'Adadelta',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'Optimizer',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'lr_scheduler']

In [6]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-5
optimizer = optim.SGD([params], lr=learning_rate)

In [7]:
t_p  = model(t_u, *params)
loss = loss_fn(t_p, t_c)
loss.backward()

# 实现params更新
optimizer.step()

params

tensor([ 9.5483e-01, -8.2600e-04], requires_grad=True)

In [8]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

t_p = model(t_un, *params)
loss = loss_fn(t_p, t_c)

optimizer.zero_grad()
loss.backward()
optimizer.step()
params



tensor([1.7761, 0.1064], requires_grad=True)

In [9]:
def training_loop(n_epochs, optimizer, params, t_u, t_c):
    for epoch in range(1, n_epochs + 1):
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)

        # 每次需要清空grad
        optimizer.zero_grad()
        # 自动求loss
        loss.backward()
        # 更新grad， 优化param
        optimizer.step()

        if epoch % 500 == 0:
            print('Epoch %d, Loss %f' % (epoch, float(loss)))
            # 打印训练中的w，b
            print(*params)

    return params

In [10]:
# 开始训练
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
# 设置优化器（用来优化param）
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs=5000,
    optimizer=optimizer,
    params=params,
    t_u= t_un,
    t_c = t_c
)

Epoch 500, Loss 7.860116
tensor(4.0443, grad_fn=<SelectBackward>) tensor(-9.8133, grad_fn=<SelectBackward>)
Epoch 1000, Loss 3.828538
tensor(4.8021, grad_fn=<SelectBackward>) tensor(-14.1031, grad_fn=<SelectBackward>)
Epoch 1500, Loss 3.092191
tensor(5.1260, grad_fn=<SelectBackward>) tensor(-15.9365, grad_fn=<SelectBackward>)
Epoch 2000, Loss 2.957697
tensor(5.2644, grad_fn=<SelectBackward>) tensor(-16.7200, grad_fn=<SelectBackward>)
Epoch 2500, Loss 2.933134
tensor(5.3236, grad_fn=<SelectBackward>) tensor(-17.0549, grad_fn=<SelectBackward>)
Epoch 3000, Loss 2.928648
tensor(5.3489, grad_fn=<SelectBackward>) tensor(-17.1980, grad_fn=<SelectBackward>)
Epoch 3500, Loss 2.927830
tensor(5.3597, grad_fn=<SelectBackward>) tensor(-17.2591, grad_fn=<SelectBackward>)
Epoch 4000, Loss 2.927679
tensor(5.3643, grad_fn=<SelectBackward>) tensor(-17.2853, grad_fn=<SelectBackward>)
Epoch 4500, Loss 2.927652
tensor(5.3662, grad_fn=<SelectBackward>) tensor(-17.2964, grad_fn=<SelectBackward>)
Epoch 5000, 

tensor([  5.3671, -17.3012], requires_grad=True)

In [11]:
# adam optim
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-1
optimizer = optim.Adam([params], lr=learning_rate) # adam

training_loop(
    n_epochs=2000,
    optimizer=optimizer,
    params=params,
    t_u= t_u, # 非正则化输入
    t_c= t_c
)


Epoch 500, Loss 7.612901
tensor(0.4081, grad_fn=<SelectBackward>) tensor(-10.0095, grad_fn=<SelectBackward>)
Epoch 1000, Loss 3.086700
tensor(0.5131, grad_fn=<SelectBackward>) tensor(-15.9629, grad_fn=<SelectBackward>)
Epoch 1500, Loss 2.928578
tensor(0.5350, grad_fn=<SelectBackward>) tensor(-17.2022, grad_fn=<SelectBackward>)
Epoch 2000, Loss 2.927646
tensor(0.5367, grad_fn=<SelectBackward>) tensor(-17.3021, grad_fn=<SelectBackward>)


tensor([  0.5367, -17.3021], requires_grad=True)

In [12]:
n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples) # 划分val

# 打乱数据集 
shuffled_indices = torch.randperm(n_samples) # Returns a random permutation of integers from 0 to n - 1.

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices

(tensor([ 7,  3, 10,  4,  8,  2,  0,  9,  1]), tensor([5, 6]))

In [13]:
# 将数据集划分
train_t_u = t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

train_t_un = 0.1 * train_t_u
val_t_un = 0.1 * val_t_u

In [14]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u, train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        # 训练集
        train_t_p = model(train_t_u, *params)
        train_loss = loss_fn(train_t_p, train_t_c)
        # 测试集
        val_t_p = model(val_t_u, *params)
        val_loss = loss_fn(val_t_p, val_t_c)

        optimizer.zero_grad()
        train_loss.backward() # 只在训练集上训练模型
        optimizer.step()

        if epoch <= 3 or epoch % 500 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f}    "
                f"Validation loss {val_loss.item():.4f}"
            )

    return params

In [15]:
params = torch.tensor([1.0, 0.0], requires_grad= True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs=3000,
    optimizer=optimizer,
    params=params,
    train_t_u=train_t_un,
    val_t_u=val_t_un,
    train_t_c=train_t_c,
    val_t_c=val_t_c
)

Epoch 1, Training loss 97.1315    Validation loss 4.9121
Epoch 2, Training loss 39.5736    Validation loss 7.6988
Epoch 3, Training loss 32.7771    Validation loss 15.8679
Epoch 500, Training loss 8.4764    Validation loss 2.6401
Epoch 1000, Training loss 3.9649    Validation loss 1.4434
Epoch 1500, Training loss 3.1063    Validation loss 2.2897
Epoch 2000, Training loss 2.9429    Validation loss 2.9192
Epoch 2500, Training loss 2.9118    Validation loss 3.2435
Epoch 3000, Training loss 2.9059    Validation loss 3.3943


tensor([  5.4996, -18.1371], requires_grad=True)

In [16]:
# 可以关闭autograd
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u, trarin_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params)
        train_loss = loss_fn(train_t_p, train_t_c)

        # 关闭val上的autograd，因为不会在val上训练loss
        with torch.no_grad():
            val_t_p = model(val_t_u, *params)
            val_loss = loss_fn(val_t_p, val_t_c)
            assert val_loss.requires_grad == False

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()


In [17]:
def calc_forward(t_u, t_c, is_train):
    with torch.set_grad_enabled(is_train):
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)
    return loss