In [1]:
import math
import numpy as np
import torch
from torch import nn
from d2l import torch as d2l

## 生成数据

首先随机生成 $n$ 维向量，再计算对应的 $k$ 次方，组成一个个单项式，最后加上噪声扰动。

In [15]:
max_degree = 50  # 多项式的最大阶数
n_train, n_test = 100, 100  # 训练和测试数据集大小
true_w = np.zeros(max_degree)
true_w[0:4] = np.array([5, 1.2, -3.4, 5.6])

features = np.random.normal(size=(n_train + n_test, 1))
np.random.shuffle(features)
poly_features = np.power(features, np.arange(max_degree).reshape(1, -1)) # 计算 k 次幂
for i in range(max_degree):
    poly_features[:, i] /= math.gamma(i + 1)  # Gamma 函数
# labels的维度:(n_train+n_test,)
labels = np.dot(poly_features, true_w) # k 次幂加权求和，形成多项式数据
labels += np.random.normal(scale=0.1, size=labels.shape)

# 转换为 tensor
true_w, features, poly_features, labels = [torch.tensor(x, dtype=
    torch.float32) for x in [true_w, features, poly_features, labels]]

In [4]:
np.power(features, np.arange(max_degree).reshape(1, -1)).shape # np.power 有广播机制

(200, 20)

In [17]:
features[:2], poly_features[:2, :], labels[:2]


(tensor([[ 1.0818],
         [-1.6986]]),
 tensor([[ 1.0000e+00,  1.0818e+00,  5.8510e-01,  2.1098e-01,  5.7056e-02,
           1.2344e-02,  2.2256e-03,  3.4393e-04,  4.6506e-05,  5.5898e-06,
           6.0468e-07,  5.9465e-08,  5.3606e-09,  4.4606e-10,  3.4467e-11,
           2.4856e-12,  1.6805e-13,  1.0694e-14,  6.4266e-16,  3.6590e-17,
           1.9790e-18,  1.0195e-19,  5.0127e-21,  2.3576e-22,  1.0627e-23,
           4.5981e-25,  1.9131e-26,  7.6648e-28,  2.9612e-29,  1.1046e-30,
           3.9830e-32,  1.3899e-33,  4.6985e-35,  1.5402e-36,  4.9003e-38,
           1.5146e-39,  4.5510e-41,  1.3312e-42,  3.7835e-44,  1.4013e-45,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
         [ 1.0000e+00, -1.6986e+00,  1.4426e+00, -8.1676e-01,  3.4683e-01,
          -1.1782e-01,  3.3355e-02, -8.0937e-03,  1.7185e-03, -3.2433e-04,
           5.5089e-05, -8.5066e-06,  1.2041e-06, -1.5732e

In [6]:
def evaluate_loss(net, data_iter, loss):  #@save
    """
    评估给定数据集上模型的损失
    """
    metric = d2l.Accumulator(2)  # 损失的总和, 样本数量
    for X, y in data_iter:
        out = net(X)
        y = y.reshape(out.shape)
        l = loss(out, y)
        metric.add(l.sum(), l.numel())
    return metric[0] / metric[1]


In [9]:
def train(train_features, test_features, train_labels, test_labels,
          num_epochs=400):
    loss = nn.MSELoss(reduction='none')
    input_shape = train_features.shape[-1]
    net = nn.Sequential(nn.Linear(input_shape, 1, bias=False)) # 关闭偏置
    batch_size = min(10, train_labels.shape[0])
    train_iter = d2l.load_array((train_features, train_labels.reshape(-1, 1)),
                                batch_size)
    test_iter = d2l.load_array((test_features, test_labels.reshape(-1, 1)),
                               batch_size, is_train=False)
    trainer = torch.optim.SGD(net.parameters(), lr=0.01)
    for epoch in range(num_epochs):
        d2l.train_epoch_ch3(net, train_iter, loss, trainer) # 再一个 epoch 内前向计算、梯度下降
        if epoch == 0 or (epoch + 1) % 20 == 0:
            print("Epoch: {}, train loss: {}, test loss: {}.".format(epoch + 1,
                evaluate_loss(net, train_iter, loss), evaluate_loss(net, test_iter, loss)
            ))
    print('weight:', net[0].weight.data.numpy())


In [10]:
# 从多项式特征中选择前4个维度，即1,x,x^2/2!,x^3/3!
train(poly_features[:n_train, :4], poly_features[n_train:, :4],
      labels[:n_train], labels[n_train:])

Epoch: 1, train loss: 19.77403266906738, test loss: 18.346735153198242.
Epoch: 20, train loss: 1.6761061668395996, test loss: 1.2469853162765503.
Epoch: 40, train loss: 0.49969078302383424, test loss: 0.37439382433891294.
Epoch: 60, train loss: 0.2079786652326584, test loss: 0.16308529019355775.
Epoch: 80, train loss: 0.09539025455713272, test loss: 0.07523179054260254.
Epoch: 100, train loss: 0.046849064230918884, test loss: 0.036400740444660185.
Epoch: 120, train loss: 0.02552562564611435, test loss: 0.019766246303915977.
Epoch: 140, train loss: 0.01609700758010149, test loss: 0.012832871563732624.
Epoch: 160, train loss: 0.011943618580698968, test loss: 0.010116934552788734.
Epoch: 180, train loss: 0.010106721669435501, test loss: 0.00914506945759058.
Epoch: 200, train loss: 0.009296395666897297, test loss: 0.008862802758812904.
Epoch: 220, train loss: 0.00893897108733654, test loss: 0.008836183995008469.
Epoch: 240, train loss: 0.008780805096030235, test loss: 0.008895322494208813.

In [12]:
train(poly_features[:n_train, :2], poly_features[n_train:, :2],
      labels[:n_train], labels[n_train:])

Epoch: 1, train loss: 29.459381561279297, test loss: 28.877881469726564.
Epoch: 20, train loss: 7.788546772003174, test loss: 5.675079479217529.
Epoch: 40, train loss: 7.779412059783936, test loss: 5.682549972534179.
Epoch: 60, train loss: 7.779470901489258, test loss: 5.68020004272461.
Epoch: 80, train loss: 7.77943229675293, test loss: 5.684016017913819.
Epoch: 100, train loss: 7.779427242279053, test loss: 5.681326274871826.
Epoch: 120, train loss: 7.7794511604309085, test loss: 5.68093017578125.
Epoch: 140, train loss: 7.779554252624512, test loss: 5.684788932800293.
Epoch: 160, train loss: 7.779422969818115, test loss: 5.682471256256104.
Epoch: 180, train loss: 7.779705829620362, test loss: 5.688370685577393.
Epoch: 200, train loss: 7.779420375823975, test loss: 5.6821357536315915.
Epoch: 220, train loss: 7.779491806030274, test loss: 5.685519466400146.
Epoch: 240, train loss: 7.779924297332764, test loss: 5.689864597320557.
Epoch: 260, train loss: 7.7796315002441405, test loss: 5

In [18]:
train(poly_features[:n_train, :], poly_features[n_train:, :],
      labels[:n_train], labels[n_train:])

Epoch: 1, train loss: 29.09508987426758, test loss: 28.300223236083983.
Epoch: 20, train loss: 0.7967560148239136, test loss: 0.6516839361190796.
Epoch: 40, train loss: 0.19374471962451933, test loss: 0.18059107422828674.
Epoch: 60, train loss: 0.10744012862443925, test loss: 0.13059627294540405.
Epoch: 80, train loss: 0.08030373707413674, test loss: 0.11123428136110305.
Epoch: 100, train loss: 0.06490175008773803, test loss: 0.09500932574272156.
Epoch: 120, train loss: 0.05407710567116737, test loss: 0.08019326344132423.
Epoch: 140, train loss: 0.04612123489379883, test loss: 0.06914456516504287.
Epoch: 160, train loss: 0.04019991457462311, test loss: 0.061137375831604.
Epoch: 180, train loss: 0.03577265664935112, test loss: 0.0544330657273531.
Epoch: 200, train loss: 0.03244529105722904, test loss: 0.050169285759329794.
Epoch: 220, train loss: 0.029909060075879096, test loss: 0.04671935521066189.
Epoch: 240, train loss: 0.027955930829048157, test loss: 0.043638511300086974.
Epoch: 26