In [7]:
%matplotlib inline
import random
import torch
from d2l import torch as d2l

In [8]:
def synthetic_data(w, b, num_examples):
#     生成y=Xw+b+epsilon
#      X.size = tensor(样本数，特征长度)
    X = torch.normal(0, 1, (num_examples, len(w))) 
    y = torch.matmul(X, w) + b
    y += torch.normal(0, 0.01, y.shape)
    return X, y.reshape((-1, 1))

In [32]:
param_w = torch.tensor([2, -3.4])
param_b = 4.2
features, labels = synthetic_data(param_w, param_b, 1000)
print(f'特征格式为', features.shape, f'标签格式为', labels.shape)

特征格式为 torch.Size([1000, 2]) 标签格式为 torch.Size([1000, 1])


In [10]:
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        batch_indices = torch.tensor(indices[i: min(i+batch_size, num_examples)])
        yield features[batch_indices], labels[batch_indices]

In [11]:
batch_size = 10
for X, y in data_iter(batch_size, features, labels):
    print(X, '\n', y)

tensor([[ 0.3098,  0.7439],
        [-1.0937,  0.6773],
        [-0.0673,  0.7005],
        [-1.4594, -1.4335],
        [ 0.0730, -0.2306],
        [ 0.4899, -0.5550],
        [-0.9359, -0.5972],
        [-0.0518,  0.1257],
        [-2.0589,  2.1569],
        [ 0.6676,  0.7456]]) 
 tensor([[ 2.2809],
        [-0.2944],
        [ 1.6996],
        [ 6.1635],
        [ 5.1168],
        [ 7.0856],
        [ 4.3573],
        [ 3.6788],
        [-7.2407],
        [ 3.0026]])
tensor([[ 1.3475e+00,  3.2836e-01],
        [ 5.0576e-02,  1.2218e-03],
        [ 1.1146e-01, -3.3590e-01],
        [-1.0422e+00, -1.3302e-02],
        [-1.0665e+00, -5.3489e-01],
        [-6.4196e-01,  3.5907e-01],
        [-3.6859e-01, -1.4108e-01],
        [ 1.9696e-01,  6.2655e-01],
        [ 4.5896e-01, -9.9708e-01],
        [ 9.8349e-01, -1.5246e+00]]) 
 tensor([[ 5.7786],
        [ 4.2961],
        [ 5.5632],
        [ 2.1617],
        [ 3.8904],
        [ 1.6933],
        [ 3.9521],
        [ 2.4600],
        [ 8

In [26]:
w = torch.normal(0, 0.01, size=(2,1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
w, b

(tensor([[0.0048],
         [0.0074]], requires_grad=True),
 tensor([0.], requires_grad=True))

In [14]:
def linear_regression(X, w, b):
    return torch.matmul(X, w) + b

In [15]:
def squared_loss(y_pre, y):
    return (y_pre - y.reshape(y_pre.shape))**2 / 2

In [16]:
def sgd(params, lr, batch_size):
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size #梯度下降
            param.grad.zero_()  #梯度清零

In [20]:
lr = 0.03 #学习率
num_epochs = 3 #训练批次
net = linear_regression #使用的模型
loss = squared_loss #损失函数
optimize = sgd #优化函数

In [27]:
for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels): #使用所有的训练数据进行一轮训练
        l = loss(net(X, w, b), y)
        l.sum().backward() #使用总的损失函数用来计算梯度
        optimize([w,b], lr, batch_size)
    with torch.no_grad():
        train_l = loss(net(features, w, b), labels)
        print(f'epoch:', epoch+1,f'   loss:', train_l.mean())
w,b

epoch: 1    loss: tensor(0.0471)
epoch: 2    loss: tensor(0.0002)
epoch: 3    loss: tensor(5.1681e-05)


(tensor([[ 1.9999],
         [-3.3992]], requires_grad=True),
 tensor([4.1995], requires_grad=True))

In [34]:
# 下面为调用API实现
%matplotlib inline
import random
import torch
import numpy as np
from torch.utils import data
from d2l import torch as d2l
from torch import nn

In [30]:
def load_data(in_data, batch_size, is_train=True):
    dataset = data.TensorDataset(*in_data)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)

In [38]:
batch_size = 10
num_epochs = 3
data_iter = load_data((features, labels), batch_size)
net = nn.Sequential(nn.Linear(2,1))

In [39]:
net[0].weight.data.normal_(0, 0.01)
net[0].bias.data.fill_(0)

tensor([0.])

In [41]:
loss = nn.MSELoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.03)

In [45]:
for epoch in range(num_epochs):
    for X,y in data_iter:
        l = loss(net(X), y)
        optimizer.zero_grad()#梯度清零
        l.backward()#计算梯度
        optimizer.step() #权重更新
    train_loss = loss(net(features), labels)
    print(f'epoch:{epoch+1}, loss:{train_loss}')
print(f'w = {net[0].weight.data}, b={net[0].bias.data}')

epoch:1, loss:9.899074939312413e-05
epoch:2, loss:9.908006904879585e-05
epoch:3, loss:9.973049600375816e-05
w = tensor([[ 1.9990, -3.3999]]), b=tensor([4.2000])
