TensorDataset 可以用来对 tensor 进行打包，就好像 python 中的 zip 功能。该类通过每一个 tensor 的第一个维度进行索引。因此，该类中的 tensor 第一维度必须相等。

TensorDataset 中的参数必须是 tensor

In [7]:
from torch.utils.data import TensorDataset
import torch
from torch.utils.data import DataLoader

a = torch.tensor([[11, 22, 33], [44, 55, 66], [77, 88, 99], [11, 22, 33], [44, 55, 66], [77, 88, 99], [11, 22, 33], [44, 55, 66], [77, 88, 99], [11, 22, 33], [44, 55, 66], [77, 88, 99]])
b = torch.tensor([0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2])
train_ids = TensorDataset(a, b)
# 切片输出
print(train_ids[:3])
print('#' * 30)


(tensor([[11, 22, 33],
        [44, 55, 66],
        [77, 88, 99]]), tensor([0, 1, 2]))
##############################


In [9]:

# 循环取数据
for x_train, y_label in train_ids:
    print(x_train, y_label)
# DataLoader进行数据封装
print('#' * 30)


tensor([11, 22, 33]) tensor(0)
tensor([44, 55, 66]) tensor(1)
tensor([77, 88, 99]) tensor(2)
tensor([11, 22, 33]) tensor(0)
tensor([44, 55, 66]) tensor(1)
tensor([77, 88, 99]) tensor(2)
tensor([11, 22, 33]) tensor(0)
tensor([44, 55, 66]) tensor(1)
tensor([77, 88, 99]) tensor(2)
tensor([11, 22, 33]) tensor(0)
tensor([44, 55, 66]) tensor(1)
tensor([77, 88, 99]) tensor(2)
##############################


In [13]:
# 每次挑选batch_size个数据集出来
train_loader = DataLoader(dataset=train_ids, batch_size=4, shuffle=True)
for i, data in enumerate(train_loader, 1):  # 注意enumerate返回值有两个,一个是序号，一个是数据（包含训练数据和标签）
    x_data, label = data
    print(' batch:{0}\n x_data:\n{1}  label: \n{2}'.format(i, x_data, label))   # y data (torch tensor)

 batch:1
 x_data:
tensor([[44, 55, 66],
        [44, 55, 66],
        [77, 88, 99],
        [11, 22, 33]])  label: 
tensor([1, 1, 2, 0])
 batch:2
 x_data:
tensor([[11, 22, 33],
        [44, 55, 66],
        [77, 88, 99],
        [77, 88, 99]])  label: 
tensor([0, 1, 2, 2])
 batch:3
 x_data:
tensor([[11, 22, 33],
        [44, 55, 66],
        [11, 22, 33],
        [77, 88, 99]])  label: 
tensor([0, 1, 0, 2])


In [19]:
%matplotlib inline
import random  # 随机梯度下降和随机初始化权重
import torch
from torch.utils import data
from d2l import  torch as d2l


# 构造数据集
def generate_data(w, b, examples_num):
    # Y = X*w + b + noise
    X = torch.normal(0, 1,(examples_num, len(w)))  # 最后输出一个examples_num * len（w）的样本tensor
    # print( " X ", X)
    # torch.matmul()若两个tensor都是一维的，则返回两个向量的点积运算结果
    Y = torch.matmul(X,w) + b
    # print( " Y ", Y)
    Y += torch.normal(0, 0.01, Y.shape)
    return X, Y.reshape((-1, 1))  # 把Y做成一个列向量返回

true_w = torch.tensor([1.5, 2.8])
true_b = 3.33
features, labels = generate_data(true_w, true_b, 1000)

In [20]:
def load_data(data_arrays, batch_size, is_train = True):
    """构造一个pytorch的数据迭代器

    Args:
        data_arrays (_type_): _description_
        batch_size (_type_): _description_
        is_train (bool, optional): _description_. Defaults to True.
    """
    data_set = data.TensorDataset(*data_arrays)
    return data.DataLoader(data_set, batch_size, shuffle = is_train)

batch_size = 10
data_iter = load_data((features, labels), batch_size)

next(iter(data_iter))

[tensor([[-1.6787,  0.2679],
         [ 1.4045,  0.1209],
         [ 0.6498,  1.5793],
         [-0.0988,  2.0127],
         [-0.8026,  0.8092],
         [-1.8287,  2.2483],
         [ 2.2411, -0.8750],
         [ 0.5960,  0.8445],
         [-0.9508, -0.0489],
         [ 0.9945,  1.6306]]),
 tensor([[1.5592],
         [5.7780],
         [8.7178],
         [8.8166],
         [4.3966],
         [6.9010],
         [4.2429],
         [6.5917],
         [1.7719],
         [9.3749]])]

In [21]:
# 定义模型 "nn"是神经网络的缩写
from torch import nn

# nn.Sequential =  list of layers
net = nn.Sequential(nn.Linear(2,1))  # 输入维度是2 输出维度是1

# net[0] 访问 nn.Sequential(nn.Linear(2,1)) 的第0层
# .weight 访问 权重 W
# .data 就是 W 的真实data
# normal_(0, 0.01)用（0， 0.01）替换data中的值
# 相当于之前实现 w b
net[0].weight.data.normal_(0, 0.01)
# 相当于之前的network偏差设为0
net[0].bias.data.fill_(0)

loss = nn.MSELoss()
# net.parameters 包括 w 和 b
trainer = torch.optim.SGD(net.parameters(), lr = 0.03)

In [23]:
epochs_num = 3
for epoch in range(epochs_num):
    for X, y in data_iter:
        l = loss(net(X),y)
        # 优化器先把梯度清零
        trainer.zero_grad()
        # pytorch 做了 sum
        l.backward()
        # 进行一次模型的更新
        trainer.step()
    l = loss(net(features), labels)
    # {l:f} 以浮点型的格式打印l
    print(f'epoch {epoch + 1}, loss {l:f}')

epoch 1, loss 0.000098
epoch 2, loss 0.000097
epoch 3, loss 0.000097
