In [17]:
import numpy as np
import torch
from torch.utils import data
from tqdm import tqdm

**1.生成数据集**

In [24]:
true_w = torch.tensor([2,-3.4])
true_b = 4.2

def create_data(w, b):
    X = torch.normal(0, 1, size=(1000, 2))
    y = torch.matmul(X, w) + b
    y += torch.normal(0, 0.01, y.shape)
    return X, y.view(-1,1)

features, labels = create_data(true_w, true_b)

**2. 加载数据集**

In [25]:
def load_array(data_array, batch_size, is_train = True):
    dataset = data.TensorDataset(*data_array)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)

batch_size = 10
data_iter = load_array((features, labels), batch_size)

In [26]:
next(iter(data_iter))

[tensor([[-0.2527, -1.4737],
         [ 1.3061, -1.9356],
         [ 0.8122, -0.9066],
         [-0.8038, -0.6368],
         [ 0.9982, -0.9784],
         [ 0.3339, -0.9493],
         [-0.6812,  1.6233],
         [-0.8818, -0.7655],
         [-0.6186,  0.1904],
         [-0.4276, -0.9220]]),
 tensor([[ 8.7173],
         [13.3975],
         [ 8.9057],
         [ 4.7665],
         [ 9.5130],
         [ 8.0890],
         [-2.6858],
         [ 5.0471],
         [ 2.2957],
         [ 6.4898]])]

**3. 定义模型**

In [27]:
from torch import nn

net = nn.Sequential(nn.Linear(2, 1))

**4. 初始化模型参数**

In [28]:
net[0].weight.data.normal_(0, 0.01)
net[0].bias.data.fill_(0)
# 带下划线的函数一般是原地操作，不能记录梯度

tensor([0.])

**5. 定义损失函数**

In [29]:
loss = nn.MSELoss()

**6. 定义优化算法**

In [30]:
trainer = torch.optim.SGD(net.parameters(), lr=0.03)

**7. 训练**

In [31]:
num_epochs = 3
for i in tqdm(range(num_epochs)):
    for X, y in data_iter:
        l = loss(net(X), y)
        trainer.zero_grad()
        l.backward()
        trainer.step()
    l = loss(net(features), labels)
    print(f'epoch {i + 1}, loss {l:f}')

100%|██████████| 3/3 [00:00<00:00, 26.51it/s]

epoch 1, loss 0.000177
epoch 2, loss 0.000095
epoch 3, loss 0.000096





* `l.backward()` : 意味着，沿着计算图，反向计算期间各个参数的梯度

* `trainer.zero_grad()` : 意味着，将此时各个遍历的梯度置 0 

In [32]:
w = net[0].weight.data
b = net[0].bias.data
print('w的估计误差:', true_w - w.reshape(true_w.shape))
print('b的估计误差:', true_b - b)

w的估计误差: tensor([-0.0009,  0.0002])
b的估计误差: tensor([-0.0008])
