In [1]:
%matplotlib inline
import random
import torch
from d2l import torch as d2l

### Generating the Dataset

In [50]:
class SyntheticRegressionData(d2l.DataModule):  #@save
    """Synthetic data for linear regression."""
    def __init__(self, w, b, noise=0.01, num_train=1000, num_val=1000,
                 batch_size=32):
        super().__init__()
        self.save_hyperparameters()
        n = num_train + num_val
        self.X = torch.randn(n, len(w))
        noise = torch.randn(n, 1) * noise
        self.y = torch.matmul(self.X, w.reshape((-1, 1))) + b + noise

In [75]:
data = SyntheticRegressionData(w=torch.tensor([2, -3.4]), b=4.2)
z = data.y

In [59]:
print('features:', data.X[0],'\nlabel:', data.y[0])

features: tensor([ 0.8440, -0.5173]) 
label: tensor([7.6289])


### Reading the Dataset

In [74]:
tab = list(range(0, 20))
random.shuffle(tab)
print(tab)

[6, 19, 1, 16, 11, 18, 12, 7, 14, 2, 0, 8, 4, 5, 17, 9, 15, 3, 10, 13]


In [86]:
z[:10], z[6], z[torch.arange(0, 9)]

(tensor([[ 2.3106],
         [-2.1565],
         [ 5.1237],
         [ 2.8306],
         [ 0.6484],
         [ 2.6666],
         [ 1.2450],
         [ 7.7623],
         [ 6.5686],
         [-1.1438]]),
 tensor([1.2450]),
 tensor([[ 2.3106],
         [-2.1565],
         [ 5.1237],
         [ 2.8306],
         [ 0.6484],
         [ 2.6666],
         [ 1.2450],
         [ 7.7623],
         [ 6.5686]]))

#### get_dataloader function

In [127]:
@d2l.add_to_class(SyntheticRegressionData)
def get_dataloader(self, train):
    if train:
        indices = list(range(0, self.num_train))
        # The examples are read in random order
        random.shuffle(indices)
    else:
        indices = list(range(self.num_train, self.num_train+self.num_val))
    for i in range(0, len(indices), self.batch_size):
        batch_indices = torch.tensor(indices[i: i+self.batch_size])
        yield self.X[batch_indices], self.y[batch_indices]

In [129]:
X, y = next(iter(data.train_dataloader()))
print('X shape:', X.shape, '\ny shape:', y.shape)

X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])


### Concise Implementation of the Data Loader

In [119]:
@d2l.add_to_class(d2l.DataModule)  #@save
def get_tensorloader(self, tensors, train, indices=slice(0, None)):
    tensors = tuple(a[indices] for a in tensors)
    dataset = torch.utils.data.TensorDataset(*tensors)
    return torch.utils.data.DataLoader(dataset, self.batch_size,
                                       shuffle=train)

@d2l.add_to_class(SyntheticRegressionData)  #@save
def get_dataloader(self, train):
    i = slice(0, self.num_train) if train else slice(self.num_train, None)
    return self.get_tensorloader((self.X, self.y), train, i)

In [118]:
X, y = next(iter(data.train_dataloader()))
print('X shape:', X.shape, '\ny shape:', y.shape)

torch.Size([1000, 1])
X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])


In [120]:
len(data.train_dataloader())

32

### Exercises

In [122]:
x = torch.randn(32)
print(x)

tensor([-0.2945, -0.6899, -0.3805,  0.9238, -0.4976, -0.3439,  0.1189,  0.7816,
        -0.2215, -1.0487,  0.8337,  0.5769, -0.8685, -2.9457, -1.8158,  0.0043,
         0.1324, -0.2590, -0.6952,  1.4374,  0.5996,  0.3645, -0.1164, -0.5946,
         0.0199,  0.4480,  0.4267, -2.3466,  2.0645,  0.9726, -0.9612, -0.6085])


In [130]:
@d2l.add_to_class(SyntheticRegressionData)
def get_dataloader(self, train):
    if train:
        indices = list(range(0, self.num_train))
        # The examples are read in random order
        random.shuffle(indices)
    else:
        indices = list(range(self.num_train, self.num_train+self.num_val))
    for i in range(0, len(indices), self.batch_size):
        X = torch.randn(self.batch_size, len(self.w))
        noise = torch.randn(self.batch_size, 1) * self.noise
        y = torch.matmul(self.X, self.w.reshape((-1, 1))) + self.b + noise
        yield X, y