In [74]:
import torch
import numpy as np

In [75]:
# recorded temperature data, c - celsius, u - unknown
t_cel = [0.5,  14.0, 15.0, 28.0, 11.0,  8.0,  3.0, -4.0,  6.0, 13.0, 21.0]
t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]
t_cel = torch.tensor(t_cel)
t_u = torch.tensor(t_u)
t_un = 0.1 * t_u

# 自动计算梯度

#### 应用自动求导, 使用 grad 属性

requires_grad=True 将使系统跟踪params张量进行操作后产生的所有张量的系谱树, 它们对params的导数将自动填充为params张量的grad属性.

In [76]:
# use linear model
def model(t_u, w, b):
    return w * t_u + b

# use square loss function
def loss_fn(t_cel, t_est):
    sqr_dif = (t_est - t_cel)**2
    return sqr_dif.mean()

In [77]:
# initialize gradient-required parameters
params = torch.tensor([1., 0.], requires_grad=True)

t_est = model(t_u, *params)
loss = loss_fn(t_cel, t_est)

loss.backward()     # 调用backward()的是待导函数

params.grad     # 系统将进行反向遍历, 计算待导函数对requires_grad者的导数

tensor([4517.2969,   82.6000])

#### 每次迭代需要归零梯度

注意! 每次反向传播, 系统将把求得的导数**累加**在grad上. 为防止这种情况, 需要在每次迭代时明确地将梯度归零.

In [78]:
if params.grad is not None:
    params.grad.zero_()

#### 总代码

In [79]:
def training_loop(n_epochs, learning_rate, params, t_u, t_cel):
    for epoch in range(1, n_epochs + 1):
        if params.grad is not None:
            params.grad.zero_()
        
        t_est = model(t_u, *params)
        loss = loss_fn(t_cel, t_est)
        loss.backward()
        
        with torch.no_grad():
            params -= learning_rate * params.grad
        
        if epoch % 500 == 0 or epoch == 1:
            print('Epoch %d, Loss %f' % (epoch, float(loss)))
    return params

In [83]:
training_loop(
    n_epochs = 2500,
    learning_rate = 3e-2,
    params = torch.tensor([1., 0.], requires_grad=True),
    t_u = t_un, t_cel = t_cel
)

Epoch 1, Loss 80.364342
Epoch 500, Loss 3.091884
Epoch 1000, Loss 2.928639
Epoch 1500, Loss 2.927653
Epoch 2000, Loss 2.927646
Epoch 2500, Loss 2.927647


tensor([  5.3677, -17.3046], requires_grad=True)

# 使用梯度下降优化器

In [None]:
import torch.optim as optim
# dir(optim)

params = torch.tensor([1., 0.], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

t_est = model(t_un, *params)
loss = loss_fn(t_cel, t_est)

optimizer.zero_grad()       # 一定要归零梯度!
loss.backward()

optimizer.step()        # 已修改params

params

In [92]:
def training_loop(n_epochs, optimizer, params, t_u, t_cel):
    for epoch in range(1, n_epochs + 1):
        t_est = model(t_u, *params)
        loss = loss_fn(t_cel, t_est)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch % 500 == 0 or epoch <= 3:
            print('Epoch %d, Loss %f' % (epoch, float(loss)))
    return params

params = torch.tensor([1., 0.], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)
training_loop(
    n_epochs = 2500,
    optimizer = optimizer,
    params = params,
    t_u = t_un, t_cel = t_cel
)

Epoch 1, Loss 80.364342
Epoch 500, Loss 7.860115
Epoch 1000, Loss 3.828538
Epoch 1500, Loss 3.092191
Epoch 2000, Loss 2.957698
Epoch 2500, Loss 2.933134


tensor([  5.3236, -17.0549], requires_grad=True)

# 训练, 验证, 防止过拟合

#### 分割数据集

In [110]:
# 构造训练集和验证集的索引

n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

In [111]:
# 构造训练集和验证集

train_t_u = t_u[train_indices]
train_t_cel = t_cel[train_indices]
val_t_u = t_u[val_indices]
val_t_cel = t_cel[val_indices]

train_t_un = 0.1 * train_t_u
val_t_un = 0.1 * val_t_u

In [113]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u, train_t_cel, val_t_cel):
    for epoch in range(1, n_epochs + 1):
        train_t_est = model(train_t_u, *params)
        train_loss = loss_fn(train_t_cel, train_t_est)
        
        with torch.no_grad():
            val_t_est = model(val_t_u, *params)
            val_loss = loss_fn(val_t_cel, val_t_est)
            assert val_loss.requires_grad == False      # if not True then throw an error

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

        if epoch % 500 == 0 or epoch <= 3:
            print(f'Epoch {epoch}, Training loss {train_loss.item():.4f}, Validation loss {val_loss.item():.4f}')
    return params

params = torch.tensor([1., 0.], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)
training_loop(
    n_epochs = 3000,
    optimizer = optimizer,
    params = params,
    train_t_u = train_t_un, val_t_u = val_t_un, train_t_cel = train_t_cel, val_t_cel = val_t_cel
)

Epoch 1, Training loss 72.7406, Validation loss 114.6712
Epoch 2, Training loss 40.2682, Validation loss 46.4569
Epoch 3, Training loss 33.9582, Validation loss 27.9884
Epoch 500, Training loss 7.0645, Validation loss 6.8510
Epoch 1000, Training loss 3.4611, Validation loss 4.2857
Epoch 1500, Training loss 2.9574, Validation loss 3.5832
Epoch 2000, Training loss 2.8870, Validation loss 3.3564
Epoch 2500, Training loss 2.8772, Validation loss 3.2767
Epoch 3000, Training loss 2.8758, Validation loss 3.2475


tensor([  5.2908, -16.8837], requires_grad=True)