##### Calculating Mean and Std from training set -> Standardization

$$
Z = \frac{(X - \mu)}{\sigma}
$$
$$
\mu = mean
$$
$$
\sigma = std
$$

Moved this here to keep the CNN notebook cleaner

In [1]:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor

In [6]:
# Images to PyTorch tensors
transform = transforms.Compose([
  transforms.ToTensor(),
])
# Get the training data from folder and transform
training_data = datasets.ImageFolder('./data/train/', transform=transform)

In [7]:
def compute_mean_std(loader):
    mean = 0.0
    std = 0.0
    total_images_count = 0

    # images is a tensor containing images, _ is ignored here but has labels
    for images, _ in loader:
        # Our images variable is currently shape [64, 3, 224, 224]
        # Last batch could have less images
        batch_samples = images.size(0)
        # From shape [64, 3, 224, 224] to [64, 3, 50176] essentially flattening the pixels into 1D
        images = images.view(batch_samples, images.size(1), -1)

        # Take mean/std of those 50176 pixel values -> [64, 3]
        # Then sum those 64 means and you now have sum of 64 images' means per color channel -> shape = [3]
        # Keep adding these for batches of 64 until we have the sum of ALL images' means per color channel.
        mean += images.mean(2).sum(0)
        std += images.std(2).sum(0)

        total_images_count += batch_samples

    mean /= total_images_count
    std /= total_images_count

    return mean, std

# Load the training data using DataLoader
# DataLoader automates the process of loading batches of data
# It essentially wraps the dataset in an iterable for easy access
loader = DataLoader(training_data, batch_size=64, shuffle=False, num_workers=1)
# Get the mean and std with above function
mean, std = compute_mean_std(loader)
print(f"Computed Mean: {mean}, Computed Std: {std}")

Computed Mean: tensor([0.4737, 0.4721, 0.3989]), Computed Std: tensor([0.2029, 0.2014, 0.2066])
