<a href="https://colab.research.google.com/github/Bitdribble/dlwpt-code/blob/master/colab/PyTorchCh8_Layer_Normalization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import datetime
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

torch.set_printoptions(edgeitems=2, linewidth=75)
torch.manual_seed(123)

<torch._C.Generator at 0x7f7581ba7b70>

In [3]:
# Data preparation
data_path = '.'
cifar10 = datasets.CIFAR10(data_path, train=True, download=True)
cifar10_val = datasets.CIFAR10(data_path, train=False, download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./cifar-10-python.tar.gz to .
Files already downloaded and verified


In [4]:
# Normalize data
transformed_cifar10 = datasets.CIFAR10(
    data_path, train=True, download=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))
transformed_cifar10_val = datasets.CIFAR10(
    data_path, train=False, download=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

In [5]:
# Restrict data to airplanes and birds
label_map = {0: 0, 2: 1}
class_names = ['airplane', 'bird']

cifar2 = [(img, label_map[label]) for img, label in transformed_cifar10 if label in [0, 2]]
cifar2_val = [(img, label_map[label]) for img, label in transformed_cifar10_val if label in [0, 2]]

In [6]:
device = (torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu'))
print(f"Training on device {device}.")

Training on device cuda.


In [20]:
def training_loop(n_epochs, device, optimizer, model, loss_fn, train_loader, val_loader, log_epochs=0):
  ret = {
      "loss_train" : {},
      "acc_train" : {},
      "acc_val" : {},
  }

  for epoch in range(1, n_epochs + 1):
    loss_train = 0.0

    for imgs, labels in train_loader:
      imgs = imgs.to(device=device)
      labels = labels.to(device=device)

      outputs = model(imgs)
      loss = loss_fn(outputs, labels)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      loss_train += loss.item()

    loss_train /= len(train_loader)
    acc_train, acc_val = validate(model, device, train_loader, val_loader)

    ret["loss_train"][epoch] = loss_train
    ret["acc_train"][epoch] = acc_train
    ret["acc_val"][epoch] = acc_val

    if log_epochs is not 0 and ((epoch+1) % log_epochs == 0 or (epoch+1) == n_epochs):
      print(f"{datetime.datetime.now()} Epoch {epoch+1}, "
            f"Loss train {loss_train:.3f} Acc train {acc_train:.3f} val {acc_val:.3f}")

def validate(model, device, train_loader, val_loader):
  acc = {}

  for name, loader in [("train", train_loader), ("val", val_loader)]:
    correct = 0
    total = 0
    with torch.no_grad(): 
      for imgs, labels in loader:
        imgs = imgs.to(device=device)
        labels = labels.to(device=device)

        outputs = model(imgs)
        _, predicted = torch.max(outputs, dim=1) 

        total += labels.shape[0]
        correct += int((predicted == labels).sum())

    acc[name] = correct / total
  
  return acc["train"], acc["val"]

In [21]:
# This implements Layer Normalization 
class NetLayerNorm(nn.Module):
  def __init__(self, n_chans1=32):
    super().__init__()

    self.n_chans1 = n_chans1
    self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
    self.conv1_batchnorm = nn.BatchNorm2d(num_features=n_chans1)
    self.act1 = nn.Tanh()
    self.pool1 = nn.MaxPool2d(2)

    self.conv2 = nn.Conv2d(n_chans1, n_chans1//2, kernel_size=3, padding=1)
    self.conv2_batchnorm = nn.BatchNorm2d(num_features=n_chans1//2)
    self.act2 = nn.Tanh()
    self.pool2 = nn.MaxPool2d(2)

    self.fc1 = nn.Linear(8*8*(n_chans1//2), 32)
    self.act3 = nn.Tanh()

    self.fc2 = nn.Linear(32, 2)

  def forward(self, x):
    out = self.pool1(self.act1(self.conv1_batchnorm(self.conv1(x))))
    out = self.pool2(self.act2(self.conv2_batchnorm(self.conv2(out))))
    out = out.view(-1, 8*8*(self.n_chans1//2)) # In place of nn.Flatten()
    out = self.act3(self.fc1(out))
    out = self.fc2(out)
    return out

In [22]:
model = NetLayerNorm(n_chans1=32).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64,
                                           shuffle=True)
loss_fn = nn.CrossEntropyLoss()

model.train() # Set train mode

training_loop(
    n_epochs = 20,
    device=device,
    optimizer = optimizer,
    model = model,

    loss_fn = loss_fn,
    train_loader = train_loader,
    val_loader = val_loader,
    log_epochs = 1
)

2022-01-31 00:44:34.089971 Epoch 2, Loss train 0.471 Acc train 0.825 val 0.824
2022-01-31 00:44:34.883009 Epoch 3, Loss train 0.374 Acc train 0.815 val 0.800
2022-01-31 00:44:35.660187 Epoch 4, Loss train 0.349 Acc train 0.856 val 0.849
2022-01-31 00:44:36.458632 Epoch 5, Loss train 0.328 Acc train 0.848 val 0.848
2022-01-31 00:44:37.239649 Epoch 6, Loss train 0.319 Acc train 0.807 val 0.785
2022-01-31 00:44:38.027396 Epoch 7, Loss train 0.304 Acc train 0.869 val 0.853
2022-01-31 00:44:38.808979 Epoch 8, Loss train 0.293 Acc train 0.873 val 0.868
2022-01-31 00:44:39.595071 Epoch 9, Loss train 0.286 Acc train 0.852 val 0.837
2022-01-31 00:44:40.385473 Epoch 10, Loss train 0.279 Acc train 0.871 val 0.873
2022-01-31 00:44:41.170520 Epoch 11, Loss train 0.271 Acc train 0.887 val 0.871
2022-01-31 00:44:41.949497 Epoch 12, Loss train 0.262 Acc train 0.894 val 0.880
2022-01-31 00:44:42.761823 Epoch 13, Loss train 0.258 Acc train 0.887 val 0.870
2022-01-31 00:44:43.554034 Epoch 14, Loss train 