In [1]:
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset

In [2]:
num_samples = 100
num_features = 20

data_array = np.random.randn(num_samples, num_features)
data_array.shape

(100, 20)

In [3]:
data_tensor = torch.tensor(data_array)
data_tensor.shape

torch.Size([100, 20])

In [4]:
dataset = TensorDataset(data_tensor)
print(len(dataset.tensors))
print(dataset.tensors[0].shape)

1
torch.Size([100, 20])


In [5]:
labels = np.floor(np.linspace(0, 3.9, num_samples))
labels_tensor = torch.tensor(labels).unsqueeze(dim=1)

print(labels.shape)
print(labels_tensor.shape)

(100,)
torch.Size([100, 1])


In [6]:
print(labels_tensor)

tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
      

In [7]:
print(data_tensor.dtype)
print(labels_tensor.dtype)

torch.float64
torch.float64


In [8]:
dataset = TensorDataset(data_tensor, labels_tensor)
print(len(dataset))
print(len(dataset.tensors))
print(dataset.tensors[0].shape)
print(dataset.tensors[1].shape)

100
2
torch.Size([100, 20])
torch.Size([100, 1])


In [9]:
batch_size = 25
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

print(len(dataloader))
print(len(dataloader.dataset))

4
100


The dataloader does not shuffle the data at creation but rather everytime you loop through it. Run the below cell to see the shuffling in action.

In [10]:
for X, y in dataloader:
    print(y)
    print("===============")

tensor([[1.],
        [0.],
        [0.],
        [2.],
        [2.],
        [2.],
        [2.],
        [1.],
        [0.],
        [0.],
        [0.],
        [2.],
        [1.],
        [3.],
        [3.],
        [0.],
        [2.],
        [1.],
        [1.],
        [1.],
        [3.],
        [0.],
        [3.],
        [1.],
        [1.]], dtype=torch.float64)
tensor([[1.],
        [3.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [2.],
        [3.],
        [0.],
        [2.],
        [3.],
        [3.],
        [2.],
        [3.],
        [1.],
        [3.],
        [1.],
        [0.],
        [2.],
        [3.],
        [1.],
        [3.],
        [2.],
        [2.]], dtype=torch.float64)
tensor([[2.],
        [2.],
        [0.],
        [0.],
        [1.],
        [2.],
        [0.],
        [0.],
        [3.],
        [2.],
        [2.],
        [0.],
        [3.],
        [1.],
        [2.],
        [0.],
        [0.],
        [1.],
    