In [21]:
import torch

t_c = torch.tensor([0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0, 21.0])
t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4])
t_un = 0.1 * t_u

## 5.5.1 自动计算梯度

In [22]:
# 1.应用自动导
def model(t_u, w, b):
    return w * t_u + b

def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c) ** 2
    return squared_diffs.mean()

params = torch.tensor([1.0, 0.0], requires_grad=True)

In [23]:
# 2.使用grad属性
params.grad is None

True

In [24]:
loss = loss_fn(model(t_u, *params), t_c)
loss.backward()

params.grad

tensor([4517.2969,   82.6000])

In [25]:
# 3.累加梯度函数
if params.grad is not None:
    params.grad.zero_()

In [26]:
def training_loop(n_epochs, learning_rate, params, t_u, t_c):
    for epoch in range(1, n_epochs + 1):
        if params.grad is not None:
            params.grad.zero_()

        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)
        loss.backward()

        with torch.no_grad():
            params -= learning_rate * params.grad

        if epoch % 500 == 0:
            print(f"Epoch {epoch}, Loss {loss}")

    return params

In [27]:
training_loop(n_epochs=5000, learning_rate=1e-2, params=torch.tensor([1.0, 0.0], requires_grad=True), t_u=t_un, t_c=t_c)

Epoch 500, Loss 7.860115051269531
Epoch 1000, Loss 3.828537940979004
Epoch 1500, Loss 3.092191219329834
Epoch 2000, Loss 2.957697868347168
Epoch 2500, Loss 2.933133840560913
Epoch 3000, Loss 2.9286484718322754
Epoch 3500, Loss 2.9278297424316406
Epoch 4000, Loss 2.9276793003082275
Epoch 4500, Loss 2.927651882171631
Epoch 5000, Loss 2.9276468753814697


tensor([  5.3671, -17.3012], requires_grad=True)

## 5.5.2 优化器

In [28]:
import torch.optim as optim

In [29]:
dir(optim)

['ASGD',
 'Adadelta',
 'Adafactor',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'NAdam',
 'Optimizer',
 'RAdam',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_adafactor',
 '_functional',
 'lr_scheduler',
 'swa_utils']

In [30]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

In [31]:
t_p = model(t_u, *params)
loss = loss_fn(t_p, t_c)

optimizer.zero_grad()
loss.backward()
optimizer.step()
params

tensor([-44.1730,  -0.8260], requires_grad=True)

In [35]:
def training_loop(n_epochs, optimizer, params, t_u, t_c):
    for epoch in range(1, n_epochs + 1):
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if not epoch % 500:
            print(f"Epoch: {epoch}, Loss: {loss}")
    return params

In [40]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-5
optimizer = optim.SGD([params], learning_rate)

training_loop(5000, optimizer, params, t_un, t_c)

Epoch: 500, Loss: 57.37331771850586
Epoch: 1000, Loss: 44.78495788574219
Epoch: 1500, Loss: 37.90142059326172
Epoch: 2000, Loss: 34.128047943115234
Epoch: 2500, Loss: 32.05025100708008
Epoch: 3000, Loss: 30.896940231323242
Epoch: 3500, Loss: 30.24767303466797
Epoch: 4000, Loss: 29.87321662902832
Epoch: 4500, Loss: 29.648618698120117
Epoch: 5000, Loss: 29.505748748779297


tensor([2.2361, 0.0655], requires_grad=True)

In [42]:
# 测试其他优化器
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-1
optimizer=optim.Adam([params], learning_rate)
training_loop(2000, optimizer, params, t_u, t_c)

Epoch: 500, Loss: 7.6128997802734375
Epoch: 1000, Loss: 3.086698293685913
Epoch: 1500, Loss: 2.9285776615142822
Epoch: 2000, Loss: 2.9276463985443115


tensor([  0.5367, -17.3021], requires_grad=True)

## 5.5.3 训练、验证和过拟合

In [61]:
# 1. 评估训练损失
# 2. 推广到验证集
# 3. 分割数据集
n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)
shuffled_indices = torch.randperm(n_samples)  # 将一个张量的元素打乱，相当于找到一种办法将元素索引重新排列

# 获得训练集和验证集的索引
train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]
train_indices, val_indices,

(tensor([9, 1, 0, 4, 6, 8, 5, 7, 2]), tensor([ 3, 10]), 11)

In [58]:
train_t_u = t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

train_t_un = 0.1 * train_t_u
val_t_un = 0.1 * val_t_u

In [59]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u, train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params)
        train_loss = loss_fn(train_t_p, train_t_c)

        val_t_p = model(val_t_u, *params)
        val_loss = loss_fn(val_t_p, val_t_c)

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

        if epoch < 4 or epoch % 500 ==0:
            print(f"Epoch: {epoch}, Training Loss: {train_loss.item():.4f}, Val loss: {val_loss.item():.4f}")
    return params

In [60]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], learning_rate)
training_loop(3000, optimizer, params, train_t_un, val_t_un, train_t_c, val_t_c)

Epoch: 1, Training Loss: 54.4696, Val loss: 196.8908
Epoch: 2, Training Loss: 29.6949, Val loss: 114.1472
Epoch: 3, Training Loss: 24.3173, Val loss: 86.2842
Epoch: 500, Training Loss: 6.9291, Val loss: 25.0236
Epoch: 1000, Training Loss: 3.4149, Val loss: 12.7145
Epoch: 1500, Training Loss: 2.6488, Val loss: 8.6820
Epoch: 2000, Training Loss: 2.4817, Val loss: 7.1729
Epoch: 2500, Training Loss: 2.4453, Val loss: 6.5498
Epoch: 3000, Training Loss: 2.4374, Val loss: 6.2767


tensor([  5.0879, -15.7776], requires_grad=True)

## 5.5.4 自动求导更新及关闭

In [62]:
def training_loop(n_epochs, optimizer, params, train_t_u, train_t_c, val_t_u, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params)
        train_loss = loss_fn(train_t_p, train_t_c)

        with torch.no_grad():
            val_t_p = modele(val_t_u, *params)
            val_loss = loss_fn(val_t_pl, val_t_c)
            assert val_loss.requires_grag == False
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
    return params