The loss function 
 - quantifies how far we are from the ideal state where the network does not make any mistakes and has perfect confidence in its answers.
 - for image classification, the most common loss function is Categorical Cross-Entropy (CCE) loss

Transformation

In [None]:
import torchvision.transforms as T

## T.Compose creates a pipeline where the provided
## transformations are run in sequence
transforms = T.Compose(
    [

        # This transforms takes a np.array or a PIL image of integers
        # in the range 0-255 and transforms it to a float tensor in the
        # range 0.0 - 1.0
        T.ToTensor(),

        # This then renormalizes the tensor to be between -1.0 and 1.0,
        # which is a better range for modern activation functions like
        # Relu
        T.Normalize((0.5), (0.5)),
    ]
)

train_data = datasets.MNIST(
    root="data", train=True, download=True, transform=transforms
)

test_data = datasets.MNIST(
    root="data", train=False, download=True, transform=transforms
)

In [None]:
# to differentiate, train=True indicates training data, train=False indicates testing data
train_data = datasets.MNIST(
    root="data", train=True, download=True, transform=transforms
)

# batch size indicates the size of the mini-batch for stochastic gradient
# num_worders indicates the number of processes that PyTorch should use to load the data
train_loader = torch.utils.data.DataLoader(
  dataset=train_data, 
  shuffle=True, 
  batch_size=batch_size,
  num_workers=num_workers
)

A good rule of thumb: use a number of workers equal to the number of CPUs on the current machine

In [None]:
import multiprocessing

n_workers = multiprocessing.cpu_count()

In [None]:
# to loop data
for image_batch, label_batch in train_loader:
#    ... do something ...
    pass

In [None]:
## Get an iterator from the dataloader
dataiter = iter(train_loader)
## Get the next batch
image_batch, label_batch = dataiter.next()

splitting training and testing

In [None]:
## Let's keep 80% of the training data for training
train_len = int(len(trainval_data) * 0.8)

## Let's use the remaining for validation
val_len = len(trainval_data) - train_len

## Perform a random split of the train dataset
train_subset, val_subset = torch.utils.data.random_split(
    trainval_data, [train_len, val_len]
)

## Now we can use the subsets as normal datasets
train_loader = torch.utils.data.DataLoader(
    dataset=train_subset, shuffle=True, batch_size=batch_size, num_workers=num_workers
)

val_loader = torch.utils.data.DataLoader(
    dataset=val_subset, shuffle=False, batch_size=batch_size, num_workers=num_workers
)