<a href="https://colab.research.google.com/github/Bitdribble/dlwpt-code/blob/master/colab/PyTorchCh8_Dropout.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Experimenting with Dropout - [Deep Learning with PyTorch](https://pytorch.org/assets/deep-learning/Deep-Learning-with-PyTorch.pdf), Chap. 8.

In [None]:
import datetime
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

torch.set_printoptions(edgeitems=2, linewidth=75)
torch.manual_seed(123)

<torch._C.Generator at 0x7f0f2b374a90>

In [None]:
# Data preparation
data_path = '.'
cifar10 = datasets.CIFAR10(data_path, train=True, download=True)
cifar10_val = datasets.CIFAR10(data_path, train=False, download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./cifar-10-python.tar.gz to .
Files already downloaded and verified


In [None]:
# Normalize data
transformed_cifar10 = datasets.CIFAR10(
    data_path, train=True, download=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))
transformed_cifar10_val = datasets.CIFAR10(
    data_path, train=False, download=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

In [None]:
# Restrict data to airplanes and birds
label_map = {0: 0, 2: 1}
class_names = ['airplane', 'bird']

cifar2 = [(img, label_map[label]) for img, label in transformed_cifar10 if label in [0, 2]]
cifar2_val = [(img, label_map[label]) for img, label in transformed_cifar10_val if label in [0, 2]]

In [None]:
device = (torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu'))
print(f"Training on device {device}.")

Training on device cpu.


In [None]:
def training_loop(n_epochs, device, optimizer, model, loss_fn, train_loader, log_epochs=0):
  for epoch in range(1, n_epochs + 1):
    loss_train = 0.0

    for imgs, labels in train_loader:
      imgs = imgs.to(device=device)
      labels = labels.to(device=device)

      outputs = model(imgs)
      loss = loss_fn(outputs, labels)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      loss_train += loss.item()

    if log_epochs is not 0 and ((epoch+1) % log_epochs == 0 or (epoch+1) == n_epochs):
      print(f"{datetime.datetime.now()} Epoch {epoch+1}, "
            f"Training loss {loss_train / len(train_loader):.3f}")

def validate(model, device, train_loader, val_loader):
  for name, loader in [("train", train_loader), ("val", val_loader)]:
    correct = 0
    total = 0
    with torch.no_grad(): 
      for imgs, labels in loader:
        imgs = imgs.to(device=device)
        labels = labels.to(device=device)

        outputs = model(imgs)
        _, predicted = torch.max(outputs, dim=1) 

        total += labels.shape[0]
        correct += int((predicted == labels).sum())


    print(f"Accuracy {name}: {correct / total:.2f}")

In [None]:
# Dropout
#
# An effective strategy for combating overfitting is: zero out a random fraction of out-
# puts  from  neurons  across  the  network,  where  the  randomization  happens  at  each
# training iteration.
#
# This  procedure  effectively  generates  slightly  different  models  with  different  neu-
# ron  topologies at  each  iteration,  giving  neurons in  the model less  chance  to  coordi-
# nate  in  the  memorization  process  that  happens  during  overfitting.  An  alternative
# point  of  view  is  that  dropout  perturbs  the  features  being  generated  by  the  model,
# exerting an effect that is close to augmentation, but this time throughout the network.
#
# We can implement dropout in a model by adding an nn.Dropout module
# between the nonlinear activation function and the linear or convolutional module
# of the subsequent layer. As an argument, we need to specify the probability with which
# inputs will be zeroed out. In case of convolutions, we’ll use the specialized nn.Dropout2d
# or nn.Dropout3d, which zero out entire channels of the input.

class NetDropout(nn.Module):
  def __init__(self, n_chans1=32):
    super().__init__()

    self.n_chans1 = n_chans1
    self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
    self.act1 = nn.Tanh()
    self.pool1 = nn.MaxPool2d(2)
    self.conv1_dropout = nn.Dropout2d(p=0.4)

    self.conv2 = nn.Conv2d(n_chans1, n_chans1//2, kernel_size=3, padding=1)
    self.act2 = nn.Tanh()
    self.pool2 = nn.MaxPool2d(2)
    self.conv2_dropout = nn.Dropout2d(p=0.4)

    self.fc1 = nn.Linear(8*8*(n_chans1//2), 32)
    self.act3 = nn.Tanh()

    self.fc2 = nn.Linear(32, 2)

  def forward(self, x):
    out = self.pool1(self.act1(self.conv1(x)))
    out = self.conv1_dropout(out)
    out = self.pool2(self.act2(self.conv2(out)))
    out = self.conv2_dropout(out)
    out = out.view(-1, 8*8*(self.n_chans1//2)) # In place of nn.Flatten()
    out = self.act3(self.fc1(out))
    out = self.fc2(out)
    return out

# Dropout is normally active during training, while during the evaluation of a
# trained model in production, dropout is bypassed or, equivalently, assigned a proba-
# bility  equal  to  zero.  This  is  controlled  through  the  train  property  of  the  Dropout
# module. Recall that PyTorch lets us switch between the two modalities by calling
#
# model.train()
#
# or
#
# model.eval()
#
# on any nn.Model subclass. The call will be automatically replicated on the submodules
# so  that  if  Dropout  is  among  them,  it  will  behave  accordingly  in  subsequent  forward
# and backward passes.

In [None]:
model = NetDropout(n_chans1=32).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64,
                                           shuffle=True)
loss_fn = nn.CrossEntropyLoss()

model.train() # Set train mode

training_loop(
    n_epochs = 100,
    device=device,
    optimizer = optimizer,
    model = model,

    loss_fn = loss_fn,
    train_loader = train_loader,
    log_epochs = 10
)

2022-01-09 14:59:36.375552 Epoch 10, Training loss 0.384
2022-01-09 15:01:13.238243 Epoch 20, Training loss 0.348
2022-01-09 15:02:50.374388 Epoch 30, Training loss 0.327
2022-01-09 15:04:26.399678 Epoch 40, Training loss 0.310
2022-01-09 15:06:08.071661 Epoch 50, Training loss 0.296
2022-01-09 15:07:47.258215 Epoch 60, Training loss 0.283
2022-01-09 15:09:27.363747 Epoch 70, Training loss 0.269
2022-01-09 15:11:07.062951 Epoch 80, Training loss 0.260
2022-01-09 15:12:46.736109 Epoch 90, Training loss 0.251
2022-01-09 15:14:25.225028 Epoch 100, Training loss 0.236


In [None]:
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64, shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64, shuffle=False)

model.eval() # Set eval mode

validate(model, device, train_loader, val_loader)

Accuracy train: 0.92
Accuracy val: 0.89
