## Standardizing Images

In [1]:
import time
import numpy as np
import torch
import torch.nn.functional as F
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader


if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True

In [2]:
# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
random_seed = 1
learning_rate = 0.05
num_epochs = 10
batch_size = 128

# Architecture
num_classes = 10

torch.manual_seed(random_seed)

<torch._C.Generator at 0x27fe51372f0>

### data

In [3]:
train_dataset = datasets.MNIST(root='D:/work/data/Python/mnist/', 
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)

train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=batch_size,
                          num_workers=0,
                          shuffle=False)

for images, labels in train_loader:  
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break

Image batch dimensions: torch.Size([128, 1, 28, 28])
Image label dimensions: torch.Size([128])


### Compute the Mean and Standard Deviation for Normalization

In [4]:
train_mean = []
train_std = []

for i, image in enumerate(train_loader, 0):
    numpy_image = image[0].numpy()
    
    # 在[batch height width]维度上求均值和方差，即一个color channel上求一个mean
    # 所以求得1个mean，对应1通道（或3个mean，对应3通道）
    batch_mean = np.mean(numpy_image, axis=(0, 2, 3))
    batch_std = np.std(numpy_image, axis=(0, 2, 3))
    
    train_mean.append(batch_mean)
    train_std.append(batch_std)

print("train_mean shape: ", np.array(train_mean).shape)

train_mean = torch.tensor(np.mean(train_mean, axis=0))
train_std = torch.tensor(np.mean(train_std, axis=0))

print('Mean:', train_mean)
print('Std Dev:', train_std)

train_mean shape:  (469, 1)
Mean: tensor([0.1307])
Std Dev: tensor([0.3077])


#### Note that

For RGB images (3 color channels), we would get 3 means and 3 standard deviations.
The transforms.ToTensor() method converts images to [0, 1] range, which is why the mean and standard deviation values are below 1.


### Standardized Dataset Loader

In [5]:
custom_transform = transforms.Compose([transforms.ToTensor(),
                                       transforms.Normalize(mean=train_mean, std=train_std)])

train_dataset = datasets.MNIST(root='D:/work/data/Python/mnist/', 
                               train=True, 
                               transform=custom_transform,
                               download=True)

test_dataset = datasets.MNIST(root='D:/work/data/Python/mnist/', 
                              train=False, 
                              transform=custom_transform)


train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=batch_size, 
                          shuffle=True)

test_loader = DataLoader(dataset=test_dataset, 
                         batch_size=batch_size, 
                         shuffle=False)

In [6]:
for images, labels in train_loader:  
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break

Image batch dimensions: torch.Size([128, 1, 28, 28])
Image label dimensions: torch.Size([128])


In [7]:
print('Channel mean:', torch.mean(images[:, 0, :, :]))
print('Channel std:', torch.std(images[:, 0, :, :]))

Channel mean: tensor(0.0097)
Channel std: tensor(1.0129)
