In [1]:
import torch
from torch import nn
from torch.utils import data

In [2]:
# 生成 y=0.05+sum(wi*xi)+noise数据，noise未标准差为0.01正态分布
n_train, n_test, num_inputs, batch_size = 20, 100, 200, 5
true_w, true_b = torch.ones((num_inputs, 1)) * 0.01, 0.05


def synthetic_data(w, b, num_examples):
    X = torch.normal(0, 1, (num_examples, len(w)))
    y = torch.matmul(X, w) + b
    y += torch.normal(0, 0.01, y.shape)
    return X, torch.reshape(y, (-1, 1))


def load_array(data_arrays, batch_size, is_train=True):
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)


train_data = synthetic_data(true_w, true_b, n_train)
train_iter = load_array(train_data, batch_size)
test_data = synthetic_data(true_w, true_b, n_test)
test_iter = load_array(test_data, batch_size, is_train=False)

In [3]:
# 初始化模型参数
def init_params():
    w = torch.normal(0, 1, size=(num_inputs, 1), requires_grad=True)
    b = torch.zeros(1, requires_grad=True)
    return [w, b]

In [4]:
# 定义l2范数
def l2_penalty(w):
    return torch.sum(w.pow(2)) / 2

In [5]:
def sgd(params, lr, batch_size):
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size
            param.grad.zero_()

def squared_loss(y_hat, y):
    return (y_hat - torch.reshape(y, y_hat.shape)) ** 2 / 2

def linreg(X, w, b):
    return torch.matmul(X, w) + b

In [9]:
# 训练
def train(lambd):
    w, b = init_params()
    net, loss = lambda X: linreg(X, w, b), squared_loss
    num_epochs, lr = 100, 0.003
    for each in range(num_epochs):
        for X, y in train_iter:
            # 增加了L2范数惩罚项，
            # ⼴播机制使l2_penalty(w)成为⼀个⻓度为batch_size的向量
            l = loss(net(X), y) + lambd * l2_penalty(w)
            l.sum().backward()
            sgd([w, b], lr, batch_size)
    print('w的L2范数是:{}'.format(torch.norm(w).item()))
    return [w,b]

In [21]:
# lambda设置为0时，即不设置惩罚项
w,b=train(lambd=0)
# 训练集loss
print("训练集loss：",torch.sum(squared_loss(linreg(train_iter.dataset.tensors[0],w,b),train_iter.dataset.tensors[1])))
# 测试集loss
print("测试集loss: ",torch.sum(squared_loss(linreg(test_iter.dataset.tensors[0],w,b),test_iter.dataset.tensors[1])))

w的L2范数是:12.796683311462402
训练集loss： tensor(4.5104e-06, grad_fn=<SumBackward0>)
测试集loss:  tensor(8744.3457, grad_fn=<SumBackward0>)


In [22]:
# 设置lambda为3
w,b=train(lambd=3)
# 训练集loss
print("训练集loss：",torch.sum(squared_loss(linreg(train_iter.dataset.tensors[0],w,b),train_iter.dataset.tensors[1])))
# 测试集loss
print("测试集loss: ",torch.sum(squared_loss(linreg(test_iter.dataset.tensors[0],w,b),test_iter.dataset.tensors[1])))

w的L2范数是:0.37384530901908875
训练集loss： tensor(0.0161, grad_fn=<SumBackward0>)
测试集loss:  tensor(8.5794, grad_fn=<SumBackward0>)
