In [1]:
import numpy as np
import torch
from torch.utils import data
from d2l import torch as d2l

In [2]:
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = d2l.synthetic_data(true_w, true_b, 1000)

In [3]:
def load_array(data_arrays, batch_size, is_train = True):   #@save
    """构造一个PyTorch数据迭代器"""
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)

In [4]:
batch_size = 10
data_iter = load_array((features, labels), batch_size)

In [5]:
next(iter(data_iter))

[tensor([[-1.4429,  0.3087],
         [ 0.6850,  0.2491],
         [ 0.9798,  0.1723],
         [ 0.4804, -1.1634],
         [ 0.7851, -2.2130],
         [ 1.2080,  0.0298],
         [-0.6162,  2.0179],
         [-1.2108,  0.6057],
         [ 0.8505, -0.0848],
         [ 0.4974,  0.2549]]),
 tensor([[ 0.2496],
         [ 4.7297],
         [ 5.5735],
         [ 9.1153],
         [13.2736],
         [ 6.5073],
         [-3.8909],
         [-0.2799],
         [ 6.1870],
         [ 4.3324]])]

In [6]:
from torch import nn

In [7]:
net = nn.Sequential(nn.Linear(2, 1))

In [19]:
net[0].weight.data.normal_(0, 0.01)
net[0].bias.data.fill_(0)
net[0].weight, net[0].bias

(Parameter containing:
 tensor([[-0.0144, -0.0100]], requires_grad=True),
 Parameter containing:
 tensor([0.], requires_grad=True))

In [20]:
loss = nn.MSELoss()

In [22]:
trainer = torch.optim.SGD(net.parameters(), lr = 0.03)

In [25]:
num_epochs = 3
for epoch in range(num_epochs):
    for X, y in data_iter:
        l = loss(net(X), y)
        trainer.zero_grad()
        l.backward()
        trainer.step()
    l = loss(net(X), y)
    print(f'epoch:{epoch+1}, loss:{l:f}')

epoch:1, loss:0.000454
epoch:2, loss:0.000094
epoch:3, loss:0.000032


In [28]:
w = net[0].weight.data
print(f'w的误差:{true_w - w.reshape(true_w.shape)}')
b = net[0].bias.data
print(f'b的误差:{true_b - b}')

w的误差:tensor([ 2.6691e-04, -3.6001e-05])
b的误差:tensor([0.0009])


In [33]:
net[0].weight.grad

tensor([[-1.8675e-04, -1.3266e-05]])

In [7]:
net1 = nn.Sequential(nn.Linear(2, 1))
net2 = nn.Sequential(nn.Linear(2, 1))
net1[0].weight.data.normal_(0, 0.01)
net1[0].bias.data.fill_(0)
net2[0].weight.data.normal_(0, 0.01)
net2[0].bias.data.fill_(0)

tensor([0.])

In [8]:
loss1 = nn.MSELoss(reduction = 'mean') #默认是mean
loss2 = nn.MSELoss(reduction = 'sum')
trainer1 = torch.optim.SGD(net1.parameters(), lr = 0.03)
trainer2 = torch.optim.SGD(net2.parameters(), lr = 0.003)

In [9]:
num_epochs = 3
for epoch in range(num_epochs):
    for X, y in data_iter:
        l1 = loss1(net1(X), y)
        l2 = loss2(net2(X), y)
        trainer1.zero_grad()
        trainer2.zero_grad()
        l1.backward()
        l2.backward()
        trainer1.step()
        trainer2.step()
        trainer1.step()
        trainer2.step()
    print(f'epoch:{epoch+1}\tloss1:{l1:f}\tloss2:{l2:f}')
    print('\tnet1_w.grad:', net1[0].weight.grad, '\tnet1_b.grad:', net1[0].bias.grad)
    print('\tnet2_w.grad:', net2[0].weight.grad, '\tnet2_b.grad:', net2[0].bias.grad)

epoch:1	loss1:0.000141	loss2:0.001412
	net1_w.grad: tensor([[-0.0079, -0.0131]]) 	net1_b.grad: tensor([0.0074])
	net2_w.grad: tensor([[-0.0787, -0.1307]]) 	net2_b.grad: tensor([0.0744])
epoch:2	loss1:0.000138	loss2:0.001383
	net1_w.grad: tensor([[-0.0042, -0.0062]]) 	net1_b.grad: tensor([-0.0098])
	net2_w.grad: tensor([[-0.0425, -0.0622]]) 	net2_b.grad: tensor([-0.0984])
epoch:3	loss1:0.000125	loss2:0.001249
	net1_w.grad: tensor([[-0.0066, -0.0040]]) 	net1_b.grad: tensor([0.0065])
	net2_w.grad: tensor([[-0.0659, -0.0395]]) 	net2_b.grad: tensor([0.0648])


In [11]:
print(f'net1的w的误差{true_w - net1[0].weight.data.reshape(true_w.shape)}')
print(f'net2的w的误差{true_w - net2[0].weight.data.reshape(true_w.shape)}')
print(f'net1的b的误差{true_b - net1[0].bias.data}')
print(f'net1的b的误差{true_b - net1[0].bias.data}')

net1的w的误差tensor([-0.0012,  0.0019])
net2的w的误差tensor([-0.0012,  0.0019])
net1的b的误差tensor([0.0004])
net1的b的误差tensor([0.0004])
