# Pytorch Notes
This file contains examples and explanations of pytorch code

In [1]:
# Imports

import torch # Base package
from torch import nn # Neural Network package
from torch.utils.data import DataLoader # Useful tool for loading data and passing it to models
from torchvision.transforms import ToTensor # Converts image data to tensors (Which are what pytorch uses) 
from torchvision.transforms import Lambda # Allows us to apply our own transformations using lambda functions
from torchvision.transforms import Compose # Used to chain together transformations

from torchvision import datasets # Free data to tinker with
print("All imports OK")

All imports OK


## Loading data

In [17]:
# We'll just use some freely available data
training_data = datasets.FashionMNIST(
    root="data", # Data folder location
    train=True, # This will be our training data
    download=False, # Can replace with False after the first run
    transform=ToTensor(), # Pytorch uses tensor objects, so may as well transform the data now
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False, # This will not be our training data
    download=False,
    transform=ToTensor(),
)


# Pytorch's DataLoaders allow us to, you guessed it, load data
# Typical usage is...
size_of_batch = 11 # However many number of data samples you use in each batch of data
train_dataloader = DataLoader(training_data, batch_size=size_of_batch)
test_dataloader = DataLoader(test_data, batch_size=size_of_batch)

In [20]:
# We can find out about the shape of our data like so
# X is the conventional name for data, y for the target result
for X, y in test_dataloader:
    print("Shape of X [Size of each batch, Colours, Height, Width]: \n", X.shape)
    print("Shape of y: ", y.shape, y.dtype)
    break # We only need to print this info once

Shape of X [Size of each batch, Colours, Height, Width]: 
 torch.Size([11, 1, 28, 28])
Shape of y:  torch.Size([11]) torch.int64


## Custom data loaders

In [None]:
# Classes can be used to make our own data loaders,
# This becomes increasing necessary as we deal with large and more complex problems
# as we will be unable to just load everything into our machine's memory at once.
# It also lets us make methods useful for a specific problem.

# Todo : find some data and make a custom loader for it

## Preprocessing
Preprocessing depends heavily on the dataset you are using, but possible choices are:
- Batch Normalisation, practically always helpful to do
- Image cropping
- Image translation
- Image rotations/flips
- Contrast changes

In [None]:
# Let's just do batch normalisation