In [1]:
import sys
sys.path.insert(0, "../..")

In [2]:
import torch
from torch import nn
from torchvision import models
from copy import deepcopy
import os

from nn_extrapolation import AcceleratedSGD
from nn_utils import *

In [3]:
trainer = Trainer(
    device="cuda",
    loss_fn=nn.NLLLoss(reduction="mean"),
    val_loss_fn=nn.NLLLoss(reduction="sum"),
)

In [4]:
dl = load_dataset(
    dataset="CIFAR10",
    root=os.path.join("/tmp", os.environ["USER"], "CIFAR"),
    augmentation=transforms.RandomAffine(10, scale=(0.9, 1.1), translate=(0.2, 0.2)),
    validation_split=0.2,
    batch_size=128,
    num_workers=10,
)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
model = models.vgg19(pretrained=False)
model.classifier[6] = nn.Linear(4096, 10)
model.classifier.add_module("7", nn.LogSoftmax(-1))
initial_state = deepcopy(model.state_dict())
model.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [6]:
sum(param.numel() for param in model.parameters())

139611210

In [7]:
state_path = "vgg19_initial_state.pth"
if os.path.exists(state_path):
    initial_state = torch.load(state_path)
    model.load_state_dict(initial_state)
    model.to(trainer.device)
else:
    torch.save(initial_state, state_path)

In [8]:
trainer.validation(model, dl["valid"])

(0.1043, 2.302760050201416)

## Momentum

In [9]:
optimizer = AcceleratedSGD(model.parameters(), 1e-2, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-16)
logger = Logger("vgg19_log_augmentation.txt.no_resizing")

In [10]:
torch.manual_seed(2020)
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:23<00:00, 13.14it/s, loss=2.1581]
Epoch 1 | Training loss: 2.1581, validation accuracy: 0.2607, validation loss: 1.8690
100%|██████████████████████████████████████████████████████████| 313/313 [00:23<00:00, 13.04it/s, loss=1.8156]
Epoch 2 | Training loss: 1.8156, validation accuracy: 0.3518, validation loss: 1.6514
100%|██████████████████████████████████████████████████████████| 313/313 [00:24<00:00, 13.00it/s, loss=1.6262]
Epoch 3 | Training loss: 1.6262, validation accuracy: 0.4122, validation loss: 1.5652
100%|██████████████████████████████████████████████████████████| 313/313 [00:24<00:00, 13.02it/s, loss=1.4571]
Epoch 4 | Training loss: 1.4571, validation accuracy: 0.5369, validation loss: 1.2735
100%|██████████████████████████████████████████████████████████| 313/313 [00:24<00:00, 13.00it/s, loss=1.3112]
Epoch 5 | Training loss: 1.3112, validation accuracy: 0.5353, validation loss: 1.2947
100%|██████████

In [11]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.90055, 0.29043524913787844)
Valid: (0.8473, 0.5097128377914428)


In [12]:
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])
model_acc.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [13]:
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.921175, 0.2374023758649826)
Valid: (0.8612, 0.4318892604827881)


In [14]:
optimizer.param_groups[0]["lr"] = 1e-3

In [15]:
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:24<00:00, 12.98it/s, loss=0.2212]
Epoch 1 | Training loss: 0.2212, validation accuracy: 0.8644, validation loss: 0.4681
100%|██████████████████████████████████████████████████████████| 313/313 [00:24<00:00, 12.96it/s, loss=0.1975]
Epoch 2 | Training loss: 0.1975, validation accuracy: 0.8688, validation loss: 0.4674
100%|██████████████████████████████████████████████████████████| 313/313 [00:24<00:00, 12.96it/s, loss=0.1848]
Epoch 3 | Training loss: 0.1848, validation accuracy: 0.8667, validation loss: 0.4718
100%|██████████████████████████████████████████████████████████| 313/313 [00:24<00:00, 12.95it/s, loss=0.1762]
Epoch 4 | Training loss: 0.1762, validation accuracy: 0.8674, validation loss: 0.4825
100%|██████████████████████████████████████████████████████████| 313/313 [00:24<00:00, 12.94it/s, loss=0.1723]
Epoch 5 | Training loss: 0.1723, validation accuracy: 0.8687, validation loss: 0.4799
100%|██████████

In [16]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.967225, 0.0931658944427967)
Valid: (0.8677, 0.5588255859375)


In [17]:
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])
model_acc.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [18]:
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9698, 0.08657128766775131)
Valid: (0.872, 0.5348740166664123)


## Epoch average

In [19]:
model.load_state_dict(initial_state)
model.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [20]:
trainer.validation(model, dl["valid"])

(0.1043, 2.302760050201416)

In [21]:
optimizer = AcceleratedSGD(model.parameters(), 1e-2, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8, mode="epoch_avg")
logger = Logger("vgg19_log_augmentation_averaging.txt")

In [22]:
torch.manual_seed(2020)
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.25it/s, loss=2.1603]
Epoch 1 | Training loss: 2.1603, validation accuracy: 0.2356, validation loss: 2.0151
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.25it/s, loss=1.8539]
Epoch 2 | Training loss: 1.8539, validation accuracy: 0.3711, validation loss: 1.6104
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.27it/s, loss=1.6423]
Epoch 3 | Training loss: 1.6423, validation accuracy: 0.4204, validation loss: 1.5605
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.28it/s, loss=1.4735]
Epoch 4 | Training loss: 1.4735, validation accuracy: 0.5045, validation loss: 1.3389
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.29it/s, loss=1.3426]
Epoch 5 | Training loss: 1.3426, validation accuracy: 0.5247, validation loss: 1.3229
100%|██████████

In [23]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.8793, 0.37251731624603274)
Valid: (0.8329, 0.5513193576812744)


In [24]:
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])
model_acc.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [25]:
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.7666, 0.7863286228179932)
Valid: (0.7675, 0.7459545360565185)


In [26]:
optimizer.param_groups[0]["lr"] = 1e-3

In [27]:
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.31it/s, loss=0.2295]
Epoch 1 | Training loss: 0.2295, validation accuracy: 0.8650, validation loss: 0.4513
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.32it/s, loss=0.2052]
Epoch 2 | Training loss: 0.2052, validation accuracy: 0.8638, validation loss: 0.4576
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.31it/s, loss=0.1912]
Epoch 3 | Training loss: 0.1912, validation accuracy: 0.8671, validation loss: 0.4484
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.32it/s, loss=0.1849]
Epoch 4 | Training loss: 0.1849, validation accuracy: 0.8667, validation loss: 0.4588
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.32it/s, loss=0.1783]
Epoch 5 | Training loss: 0.1783, validation accuracy: 0.8683, validation loss: 0.4611
100%|██████████

In [28]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.967225, 0.0944846766114235)
Valid: (0.8708, 0.53550360622406)


In [29]:
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])
model_acc.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [30]:
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.933925, 0.20556061109304427)
Valid: (0.8611, 0.42290406703948974)


## Epoch average, span = 10

In [9]:
model.load_state_dict(initial_state)
model.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [10]:
trainer.validation(model, dl["valid"])

(0.1043, 2.302760050201416)

In [11]:
optimizer = AcceleratedSGD(model.parameters(), 1e-2, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8, mode="epoch_avg", avg_alpha=2 / (10 + 1))
logger = Logger("vgg19_log_augmentation_averaging_span10.txt")

In [12]:
torch.manual_seed(2020)
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.44it/s, loss=2.1706]
Epoch 1 | Training loss: 2.1706, validation accuracy: 0.2560, validation loss: 1.9420
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.41it/s, loss=1.8320]
Epoch 2 | Training loss: 1.8320, validation accuracy: 0.3525, validation loss: 1.6695
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.36it/s, loss=1.6395]
Epoch 3 | Training loss: 1.6395, validation accuracy: 0.4333, validation loss: 1.4827
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.31it/s, loss=1.4762]
Epoch 4 | Training loss: 1.4762, validation accuracy: 0.5280, validation loss: 1.3015
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.31it/s, loss=1.3411]
Epoch 5 | Training loss: 1.3411, validation accuracy: 0.5597, validation loss: 1.2226
100%|██████████

In [13]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.88835, 0.33212430019378664)
Valid: (0.8327, 0.5609132087707519)


In [14]:
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])
model_acc.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [15]:
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.922725, 0.23121466193199158)
Valid: (0.8666, 0.4086621653556824)


In [16]:
optimizer.param_groups[0]["lr"] = 1e-3

In [17]:
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.29it/s, loss=0.2220]
Epoch 1 | Training loss: 0.2220, validation accuracy: 0.8669, validation loss: 0.4585
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.29it/s, loss=0.1960]
Epoch 2 | Training loss: 0.1960, validation accuracy: 0.8669, validation loss: 0.4579
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.29it/s, loss=0.1841]
Epoch 3 | Training loss: 0.1841, validation accuracy: 0.8668, validation loss: 0.4567
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.30it/s, loss=0.1752]
Epoch 4 | Training loss: 0.1752, validation accuracy: 0.8682, validation loss: 0.4570
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.30it/s, loss=0.1712]
Epoch 5 | Training loss: 0.1712, validation accuracy: 0.8680, validation loss: 0.4616
100%|██████████

In [18]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.969525, 0.09006820015907288)
Valid: (0.8661, 0.5400399757385254)


In [19]:
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])
model_acc.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [20]:
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.970475, 0.0864874561548233)
Valid: (0.8693, 0.5135772533416748)


## Epoch average, alpha = 0.9

In [21]:
model.load_state_dict(initial_state)
model.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [22]:
trainer.validation(model, dl["valid"])

(0.1043, 2.302760050201416)

In [23]:
optimizer = AcceleratedSGD(model.parameters(), 1e-2, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8, mode="epoch_avg", avg_alpha=0.9)
logger = Logger("vgg19_log_augmentation_averaging_exp09.txt")

In [24]:
torch.manual_seed(2020)
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.30it/s, loss=2.1632]
Epoch 1 | Training loss: 2.1632, validation accuracy: 0.2577, validation loss: 1.9082
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.29it/s, loss=1.8213]
Epoch 2 | Training loss: 1.8213, validation accuracy: 0.3578, validation loss: 1.6476
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.28it/s, loss=1.6168]
Epoch 3 | Training loss: 1.6168, validation accuracy: 0.4345, validation loss: 1.4737
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.29it/s, loss=1.4615]
Epoch 4 | Training loss: 1.4615, validation accuracy: 0.5441, validation loss: 1.2535
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.28it/s, loss=1.3174]
Epoch 5 | Training loss: 1.3174, validation accuracy: 0.5536, validation loss: 1.2526
100%|██████████

In [25]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.88865, 0.3337964326858521)
Valid: (0.8391, 0.528892395401001)


In [26]:
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])
model_acc.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [27]:
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.92125, 0.23343262457847594)
Valid: (0.8607, 0.42399153881073)


In [28]:
optimizer.param_groups[0]["lr"] = 1e-3

In [29]:
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.29it/s, loss=0.2262]
Epoch 1 | Training loss: 0.2262, validation accuracy: 0.8628, validation loss: 0.4695
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.29it/s, loss=0.1957]
Epoch 2 | Training loss: 0.1957, validation accuracy: 0.8639, validation loss: 0.4678
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.30it/s, loss=0.1856]
Epoch 3 | Training loss: 0.1856, validation accuracy: 0.8649, validation loss: 0.4617
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.29it/s, loss=0.1799]
Epoch 4 | Training loss: 0.1799, validation accuracy: 0.8676, validation loss: 0.4651
100%|██████████████████████████████████████████████████████████| 313/313 [00:27<00:00, 11.29it/s, loss=0.1732]
Epoch 5 | Training loss: 0.1732, validation accuracy: 0.8676, validation loss: 0.4752
100%|██████████

In [30]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.970425, 0.0891860221505165)
Valid: (0.8709, 0.5481557329177856)


In [31]:
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])
model_acc.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [32]:
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.96985, 0.08826465769410133)
Valid: (0.8716, 0.5232996897697448)
