In [1]:
import torch

In [3]:
X_train, X_test = torch.rand(5, 2), torch.rand(2, 2)
y_train, y_test = torch.tensor([0, 0, 0, 1, 1]), torch.tensor([0, 1])

In [4]:
# NOTE: Pytorch requires that class labels start with label 0, 
# and the largest class label value should not exceed the number of output nodes minus 1 (output layer wise)

from torch.utils.data import Dataset

class ToyDataset(Dataset):
    def __init__(self, X, y): # set up the attributes that we can access later in the other methods
        self.features = X
        self.labels = y

    def __getitem__(self, index): # define instructions for returning exactly one item from the dataset via index
        one_x = self.features[index]
        one_y = self.labels[index]
        return one_x, one_y

    def __len__(self): # retrieve the lenght of the dataset
        return self.labels.shape[0]

train_ds = ToyDataset(X=X_train, y=y_train)
test_ds = ToyDataset(X=X_test, y=y_test)

# The purpose of this custom ToyDataset class is to instantiate a PyTorch DataLoader.

In [5]:
from torch.utils.data import DataLoader

torch.manual_seed(123)

train_loader = DataLoader(
    dataset=train_ds,
    batch_size=2,
    shuffle=True,
    drop_last=False,
    num_workers=0
)

test_loader = DataLoader(
    dataset=test_ds,
    batch_size=2,
    shuffle=False,
    drop_last=False,
    num_workers=0
)

In [6]:
# After instantiating the training dataloader, we can iterate over it

for idx, (x, y) in enumerate(train_loader):
    print(f"Batch {idx+1}:", x, y)

Batch 1: tensor([[0.4976, 0.8031],
        [0.9427, 0.3825]]) tensor([1, 0])
Batch 2: tensor([[0.4517, 0.8602],
        [0.6670, 0.1880]]) tensor([0, 0])
Batch 3: tensor([[0.4873, 0.0292]]) tensor([1])
