## Loading Dataset in Batches

In [None]:
import torch

In [8]:
# Creating a tensor
a = torch.rand((10,3))
print(a)

tensor([[0.6116, 0.5117, 0.5136],
        [0.2954, 0.8412, 0.4780],
        [0.7043, 0.4535, 0.3383],
        [0.5575, 0.9692, 0.1305],
        [0.2817, 0.1753, 0.0334],
        [0.1222, 0.3172, 0.3533],
        [0.4115, 0.2379, 0.9073],
        [0.4710, 0.0060, 0.1345],
        [0.8155, 0.1687, 0.1588],
        [0.5361, 0.4842, 0.7634]])


In [10]:
from torch.utils.data import DataLoader

In [13]:
data = DataLoader(a, batch_size=3)
"""
This loads the whole dataset but in small chunks that can be used as mini-batches
"""
for i, data in enumerate(data):
    print(f"{1}) {data}")

1) tensor([[0.6116, 0.5117, 0.5136],
        [0.2954, 0.8412, 0.4780],
        [0.7043, 0.4535, 0.3383]])
1) tensor([[0.5575, 0.9692, 0.1305],
        [0.2817, 0.1753, 0.0334],
        [0.1222, 0.3172, 0.3533]])
1) tensor([[0.4115, 0.2379, 0.9073],
        [0.4710, 0.0060, 0.1345],
        [0.8155, 0.1687, 0.1588]])
1) tensor([[0.5361, 0.4842, 0.7634]])


## Creating a custom inheritance of pytorch of Dataset

In [19]:
# Joining 2 data set
# We need to create a custom joinDataset class that inheritate Dataset

from torch.utils.data import Dataset

class joinDataset(Dataset):
    """
    When we Create inheritance of Dataset class we need to have these 3 compulsory methods:
    1. __init__
    2. __len__
    3. __getitem__
    """
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [29]:
# creating datasets or tensors to join
d1 = torch.zeros((2,3)) # think this is feature dataset
d2 = torch.zeros((2)) # and this is target dataset

dataset = joinDataset(d1, d2)

In [30]:
for i in dataset:
    print(i)

(tensor([0., 0., 0.]), tensor(0.))
(tensor([0., 0., 0.]), tensor(0.))
