In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

In [2]:
batch_size = 100
num_classes = 10
epochs = 75
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#### LOAD DATA AND TRANSFORM

In [3]:
# The output of torchvision datasets are PILImage images of range [0, 1].
# https://pytorch.org/docs/stable/torchvision/transforms.html
train_transforms = torchvision.transforms.Compose([
    torchvision.transforms.RandomHorizontalFlip(p=0.5),
    torchvision.transforms.RandomAffine(degrees=0, translate=(0.2, 0.2)),
    torchvision.transforms.ToTensor()
])

trainset = torchvision.datasets.CIFAR10('./data', train=True, download=True, transform=train_transforms)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)

Files already downloaded and verified


In [4]:
test_transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

testset = torchvision.datasets.CIFAR10('./data', train=False, download=True, transform=test_transforms)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)

Files already downloaded and verified


In [5]:
x_batch, y_batch = iter(train_loader).next()
print("Training set: {} samples - Max value: {} - Min value: {}".format(len(train_loader.dataset), 
                                                                        x_batch.max(), x_batch.min()))

Training set: 50000 samples - Max value: 1.0 - Min value: 0.0


In [6]:
x_batch, y_batch = iter(test_loader).next()
print("Test set: {} samples - Max value: {} - Min value: {}".format(len(test_loader.dataset), 
                                                                        x_batch.max(), x_batch.min()))

Test set: 10000 samples - Max value: 1.0 - Min value: 0.0


In [7]:
print("Example batch shape: {}".format(x_batch.shape))

Example batch shape: torch.Size([100, 3, 32, 32])


#### Net

In [8]:
class GaussianNoise(nn.Module):
    """Gaussian noise regularizer.

    Args:
        sigma (float, optional): relative standard deviation used to generate the
            noise. Relative means that it will be multiplied by the magnitude of
            the value your are adding the noise to. This means that sigma can be
            the same regardless of the scale of the vector.
        is_relative_detach (bool, optional): whether to detach the variable before
            computing the scale of the noise. If `False` then the scale of the noise
            won't be seen as a constant but something to optimize: this will bias the
            network to generate vectors with smaller values.
    """

    def __init__(self, sigma=0.1, is_relative_detach=True):
        super().__init__()
        self.sigma = sigma
        self.is_relative_detach = is_relative_detach
        self.noise = torch.tensor(0).to(device).float()

    def forward(self, x):
        if self.training and self.sigma != 0:
            scale = self.sigma * x.detach() if self.is_relative_detach else self.sigma * x
            sampled_noise = self.noise.repeat(*x.size()).normal_() * scale
            x = x + sampled_noise
        return x 

In [9]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.block1 = self._CBGN(3, 32)
        self.block2 = self._CBGN(32, 64)
        self.block3 = self._CBGN(64, 128)
        self.block4 = self._CBGN(128, 256)
        self.block5 = self._CBGN(256, 512)
        # Flatten at forward!
        self.features = nn.Linear(512, 512)
        self.reluFeatures = nn.ReLU()
        self.classifier = nn.Linear(512, num_classes)

    def forward(self, x):
        out = self.block1(x)               # 32x32 -> 16x16 @ 32
        out = self.block2(out)             # 16x16 -> 8x8 @ 64
        out = self.block3(out)             # 8x8 -> 4x4 @ 128
        out = self.block4(out)             # 4x4 -> 2x2 @ 256
        out = self.block5(out)             # 2x2 -> 1x1 @ 512
        out = out.view(out.size(0), -1)    # Flatten
        out = self.reluFeatures(self.features(out))  # 512 -> 512
        out = self.classifier(out)         # 512 -> num_classes (10)
        return out

    # DEF A BLOCK Conv + BN + GN + MaxPool
    def _CBGN(self, in_channels, filters):
        layers = []
        layers += [nn.Conv2d(in_channels, filters, kernel_size=(3,3), padding=1)]
        layers += [nn.BatchNorm2d(filters)]
        layers += [GaussianNoise(0.3)]
        layers += [nn.ReLU()]
        layers += [nn.MaxPool2d(kernel_size=(2,2))]
        return nn.Sequential(*layers)


net = Net().to(device)
print(net)

Net(
  (block1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): GaussianNoise()
    (3): ReLU()
    (4): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (block2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): GaussianNoise()
    (3): ReLU()
    (4): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (block3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): GaussianNoise()
    (3): ReLU()
    (4): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (block4): S

### Training

In [10]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1, weight_decay=1e-6)

In [11]:
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[25, 50], gamma=0.1)
# lr = 0.1     at epoch < 25
# lr = 0.01    30 <= epoch < 50
# lr = 0.001   epoch >= 50

In [12]:
print("\n---- Start Training ----")
best_accuracy = -1
for epoch in range(epochs):

    # TRAIN THE NETWORK
    train_loss, train_correct = 0, 0
    net.train()
    for inputs, targets in train_loader:
        # data is a list of [inputs, labels]
        inputs, targets = inputs.to(device), targets.to(device)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        _, pred = outputs.max(1)  # get the index of the max log-probability
        train_correct += pred.eq(targets).sum().item()

        # print statistics
        train_loss += loss.item()
        
    train_loss /= len(train_loader.dataset)

    # TEST NETWORK
    net.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            test_loss += criterion(outputs, targets)
            _, pred = outputs.max(1)  # get the index of the max log-probability
            correct += pred.eq(targets).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    
    for param_group in optimizer.param_groups:
        current_lr = param_group['lr']
        
    print("[Epoch {}] LR: {:.3f} - Train Loss: {:.5f} - Test Loss: {:.5f} - Train Accuracy: {:.2f}% - Test Accuracy: {:.2f}%".format(epoch+1, current_lr, train_loss, test_loss, 100. * train_correct / len(train_loader.dataset), test_accuracy))
    
    if test_accuracy>best_accuracy:
        best_accuracy = test_accuracy
    
    scheduler.step()
    
print('Finished Training')
print("Best Test accuracy: {:.2f}".format(best_accuracy))


---- Start Training ----
[Epoch 1] LR: 0.100 - Train Loss: 0.01577 - Test Loss: 0.01614 - Train Accuracy: 41.42% - Test Accuracy: 41.69%
[Epoch 2] LR: 0.100 - Train Loss: 0.01230 - Test Loss: 0.01789 - Train Accuracy: 55.44% - Test Accuracy: 40.38%
[Epoch 3] LR: 0.100 - Train Loss: 0.01085 - Test Loss: 0.01558 - Train Accuracy: 61.01% - Test Accuracy: 48.92%
[Epoch 4] LR: 0.100 - Train Loss: 0.00986 - Test Loss: 0.01205 - Train Accuracy: 64.77% - Test Accuracy: 56.29%
[Epoch 5] LR: 0.100 - Train Loss: 0.00915 - Test Loss: 0.01244 - Train Accuracy: 67.55% - Test Accuracy: 56.63%
[Epoch 6] LR: 0.100 - Train Loss: 0.00865 - Test Loss: 0.01205 - Train Accuracy: 69.33% - Test Accuracy: 57.97%
[Epoch 7] LR: 0.100 - Train Loss: 0.00826 - Test Loss: 0.02198 - Train Accuracy: 70.80% - Test Accuracy: 36.47%
[Epoch 8] LR: 0.100 - Train Loss: 0.00796 - Test Loss: 0.01275 - Train Accuracy: 71.85% - Test Accuracy: 57.34%
[Epoch 9] LR: 0.100 - Train Loss: 0.00766 - Test Loss: 0.01160 - Train Accurac

[Epoch 74] LR: 0.001 - Train Loss: 0.00414 - Test Loss: 0.00470 - Train Accuracy: 85.36% - Test Accuracy: 83.75%
[Epoch 75] LR: 0.001 - Train Loss: 0.00416 - Test Loss: 0.00463 - Train Accuracy: 85.23% - Test Accuracy: 84.19%
Finished Training
Best Test accuracy: 84.31
