In [None]:
"""I Target： Here we learn how to make dataset

II Definition: In Torch, Dataset is not actual a images-set, it's a defined class, if you want to design your own
 Dataset, you should inherit from torch.utils.data.Dataset. Actually, you can get images and labels by __getitem__,
 where they store in torch.utils.data.Dataset.
    PyTorch provides two data primitives: torch.utils.data.DataLoader and torch.utils.data.Dataset that allow you to use pre-loaded datasets as well as your own data. 
    Dataset stores the samples and their corresponding labels, 
    and DataLoader wraps an iterable around the Dataset to enable easy access to the samples.

III Instances:
    1.0 torch.utils.Data.Dataset, learn by random sample
    1.1 __getitem__, __len__
    1.2 Dataset Exsample(MNIST) and Transforms(Augumentation) 
    1.3 Other tasks' Dataset (VOCSegmentation, VOCDetection)
    1.4 Make Dataset from own ImageFolder
    1.4.1 classes, class to id

IV Compare 2 then Generalize

V Test in New instance 
"""

In [8]:
import torch
import torchvision
import os 
from torch.utils.data import Dataset

In [7]:
def random_sample():
    """1.0&1.1  Generate random x&y Dataset to show function usage of __getitem__, __len__
    """

  
    x = torch.linspace(-1, 1, 10)
    y = x**2

    class SimpleDataset(Dataset): # must inherit from Dataset to get its initialization

        def __init__(self, x, y):
            super().__init__() 
            self.x =x
            self.y = y

        def __getitem__(self, index):
            return {"x":self.x[index], "y":self.y[index]}

        def __len__(self):
            return len(self.x)

    simpledataset = SimpleDataset(x, y) # instantiation
    index = 0

    # you will find output of simpledataset.__getitem__(index)) equal to simpledataset[index],
    #  which means dataset is also functional
    print("simpleDataset.__getitem__({}):{}".format(index, simpledataset.__getitem__(index)))
    print("simpledataset：{}".format(simpledataset[index]))

    print("SimpleDataset.__len__():{}".format(simpledataset.__len__()))
    print("simpledataset：{}".format(len(simpledataset)))






simpleDataset.__getitem__(0):{'x': tensor(-1.), 'y': tensor(1.)}
simpledataset：{'x': tensor(-1.), 'y': tensor(1.)}
SimpleDataset.__len__():10
simpledataset：10


In [1]:
def transform():
    """1.2 Here we learn how to do transform on Dataset
    """

    from torchvision.datasets import MNIST
    from torchvision import transforms

    transform = transforms.Compose(
        [
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5,), std=(0.5,))
        ]
    )   

    train_dataset = MNIST(root= "/home/hpczeji1/hpc-work/Codebase/Datasets/data_mnist",
                    train= True, 
                    transform=transform,
                    target_transform=None,
                    download=True, # False
    )

    index = 0 
    print(f"type(train_dataset[{index}]:{train_dataset[index]})")
    print(f"type(train_dataset[{index}][0]:{type(train_dataset[index][0])})")
    print("train_dataset[{}][0].shape: {}".format(index, train_dataset[index][0].shape))  # torch.Size([1, 28, 28])
    print("type(train_dataset[{}][1]): {}".format(index, type(train_dataset[index][1])))  # <class 'int'>



In [2]:
# random_sample()
transform()

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /home/hpczeji1/hpc-work/Codebase/Datasets/data_mnist/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting /home/hpczeji1/hpc-work/Codebase/Datasets/data_mnist/MNIST/raw/train-images-idx3-ubyte.gz to /home/hpczeji1/hpc-work/Codebase/Datasets/data_mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /home/hpczeji1/hpc-work/Codebase/Datasets/data_mnist/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting /home/hpczeji1/hpc-work/Codebase/Datasets/data_mnist/MNIST/raw/train-labels-idx1-ubyte.gz to /home/hpczeji1/hpc-work/Codebase/Datasets/data_mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /home/hpczeji1/hpc-work/Codebase/Datasets/data_mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting /home/hpczeji1/hpc-work/Codebase/Datasets/data_mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to /home/hpczeji1/hpc-work/Codebase/Datasets/data_mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /home/hpczeji1/hpc-work/Codebase/Datasets/data_mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting /home/hpczeji1/hpc-work/Codebase/Datasets/data_mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to /home/hpczeji1/hpc-work/Codebase/Datasets/data_mnist/MNIST/raw

type(train_dataset[0]:(tensor([[[-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
          -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
          -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
          -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000],
         [-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
          -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
          -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
          -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000],
         [-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
          -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
          -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
     