# Transforms

* Data does not always come in its final processed form that is required for training machine learning algorithms.

* We use `transforms` to perform some manipulation of the data and make it suitable for training.

In [1]:
# FashionMNIST dataset features are in PIL Image format, and the labels are integers.
# For training, we need the features as normalized tensors, and the labels as one-hot encoded tensors.
# To make these transformations, we use the ToTensor and Lambda transforms.
import torch
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda

In [11]:
ds = datasets.FashionMNIST(
    root="/home/hslee/Desktop/Datasets",
    train=True,
    download=True,
    transform=ToTensor(),
    target_transform=Lambda(lambda y: torch.zeros(10, dtype=torch.float).scatter_(dim=0, index=torch.tensor(y), value=1))
)

# target_transform=Lambda(lambda y: torch.zeros(10, dtype=torch.float).scatter_(0, torch.tensor(y), value=1))
# --> it crist creates zero tensor of size 10(# labels in our dataset)
# and calls scatter_ which assigns a value=1 on the index as given by the label y.

In [8]:
# https://pytorch.org/docs/stable/generated/torch.Tensor.scatter_.html
# understanding scatter_(dim=, index=, value=)
label = torch.tensor([3,4,5,6,7])
one_hot = torch.zeros(5, 10)
print(one_hot)

label = label.view(-1,1)
print(label)

print(one_hot.scatter_(1, label, 1)) 

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
tensor([[3],
        [4],
        [5],
        [6],
        [7]])
tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]])
