In [1]:
import sys
sys.path.insert(0, "../..")

In [2]:
import torch
from torch import nn
from torchvision import models
from copy import deepcopy
import os

from nn_extrapolation import AcceleratedSGD
from nn_utils import *

In [3]:
trainer = Trainer(
    device="cuda",
    loss_fn=nn.NLLLoss(reduction="mean"),
    val_loss_fn=nn.NLLLoss(reduction="sum"),
)

In [4]:
dl = load_dataset(
    dataset="CIFAR10",
    root=os.path.join("/tmp", os.environ["USER"], "CIFAR"),
    augmentation=transforms.RandomAffine(10, scale=(0.9, 1.1), translate=(0.2, 0.2)),
    validation_split=0.2,
    batch_size=128,
    num_workers=10,
)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /tmp/michal/CIFAR/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting /tmp/michal/CIFAR/cifar-10-python.tar.gz to /tmp/michal/CIFAR
Files already downloaded and verified


In [5]:
model = models.vgg16(pretrained=False)
model.classifier[6] = nn.Linear(4096, 10)
model.classifier.add_module("7", nn.LogSoftmax(-1))
initial_state = deepcopy(model.state_dict())
model.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [6]:
sum(param.numel() for param in model.parameters())

134301514

In [7]:
state_path = "vgg_initial_state.pth"
if os.path.exists(state_path):
    initial_state = torch.load(state_path)
    model.load_state_dict(initial_state)
    model.to(trainer.device)
else:
    torch.save(initial_state, state_path)

In [8]:
trainer.validation(model, dl["valid"])

(0.1021, 2.3028841983795165)

## Momentum

### Lambda = 1e-16

In [10]:
optimizer = AcceleratedSGD(model.parameters(), 1e-2, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-16)
logger = Logger("vgg_log_augmentation_lambda=1e-16.txt.no_resizing")

In [11]:
torch.manual_seed(2020)
epochs = 45

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:21<00:00, 14.49it/s, loss=2.0907]
Epoch 1 | Training loss: 2.0907, validation accuracy: 0.2792, validation loss: 1.8200
100%|██████████████████████████████████████████████████████████| 313/313 [00:21<00:00, 14.33it/s, loss=1.7683]
Epoch 2 | Training loss: 1.7683, validation accuracy: 0.3958, validation loss: 1.5794
100%|██████████████████████████████████████████████████████████| 313/313 [00:21<00:00, 14.33it/s, loss=1.5611]
Epoch 3 | Training loss: 1.5611, validation accuracy: 0.4686, validation loss: 1.4015
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 14.20it/s, loss=1.3847]
Epoch 4 | Training loss: 1.3847, validation accuracy: 0.5676, validation loss: 1.1729
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 14.07it/s, loss=1.2430]
Epoch 5 | Training loss: 1.2430, validation accuracy: 0.6210, validation loss: 1.0554
100%|██████████

In [12]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.94235, 0.1702565127968788)
Valid: (0.8586, 0.5058353942871093)


In [13]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [14]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.969375, 0.0969354386806488)
Valid: (0.8797, 0.4009284397125244)


In [15]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [16]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.971275, 0.09501042534708977)
Valid: (0.88, 0.40091258058547974)


In [17]:
optimizer.param_groups[0]["lr"] = 1e-3

In [18]:
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 13.83it/s, loss=0.1141]
Epoch 1 | Training loss: 0.1141, validation accuracy: 0.8784, validation loss: 0.4729
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 13.87it/s, loss=0.0910]
Epoch 2 | Training loss: 0.0910, validation accuracy: 0.8781, validation loss: 0.4904
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 13.81it/s, loss=0.0848]
Epoch 3 | Training loss: 0.0848, validation accuracy: 0.8792, validation loss: 0.4886
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 13.81it/s, loss=0.0780]
Epoch 4 | Training loss: 0.0780, validation accuracy: 0.8798, validation loss: 0.4958
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 13.90it/s, loss=0.0778]
Epoch 5 | Training loss: 0.0778, validation accuracy: 0.8776, validation loss: 0.4865
100%|██████████

In [19]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.985675, 0.04322471878305077)
Valid: (0.8805, 0.5522485641479492)


In [20]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [21]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.98565, 0.041061229557543995)
Valid: (0.8814, 0.553578140258789)


In [22]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [23]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9871, 0.03952537299245596)
Valid: (0.8814, 0.5535913507461547)


### Lambda = 1e-8

In [24]:
model.load_state_dict(initial_state)
optimizer = AcceleratedSGD(model.parameters(), 1e-2, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8)
logger = Logger("vgg_log_augmentation.txt.no_resizing")

In [25]:
torch.manual_seed(2020)
epochs = 45

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:21<00:00, 14.24it/s, loss=2.1057]
Epoch 1 | Training loss: 2.1057, validation accuracy: 0.2138, validation loss: 1.9433
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 14.21it/s, loss=1.7576]
Epoch 2 | Training loss: 1.7576, validation accuracy: 0.4141, validation loss: 1.5410
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 14.20it/s, loss=1.5501]
Epoch 3 | Training loss: 1.5501, validation accuracy: 0.4501, validation loss: 1.4603
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 14.18it/s, loss=1.3714]
Epoch 4 | Training loss: 1.3714, validation accuracy: 0.5810, validation loss: 1.1591
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 14.17it/s, loss=1.2295]
Epoch 5 | Training loss: 1.2295, validation accuracy: 0.6017, validation loss: 1.1007
100%|██████████

In [26]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.94135, 0.16899394929409028)
Valid: (0.8584, 0.5159762786865234)


In [27]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [28]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.97015, 0.09646140650510789)
Valid: (0.8819, 0.3910102800369263)


In [29]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [30]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9706, 0.09521213583946228)
Valid: (0.8817, 0.3910058512687683)


In [31]:
optimizer.param_groups[0]["lr"] = 1e-3

In [32]:
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 14.18it/s, loss=0.1145]
Epoch 1 | Training loss: 0.1145, validation accuracy: 0.8777, validation loss: 0.4648
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 14.15it/s, loss=0.0924]
Epoch 2 | Training loss: 0.0924, validation accuracy: 0.8800, validation loss: 0.4734
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 14.13it/s, loss=0.0829]
Epoch 3 | Training loss: 0.0829, validation accuracy: 0.8779, validation loss: 0.4921
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 14.12it/s, loss=0.0822]
Epoch 4 | Training loss: 0.0822, validation accuracy: 0.8790, validation loss: 0.4906
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 14.12it/s, loss=0.0771]
Epoch 5 | Training loss: 0.0771, validation accuracy: 0.8807, validation loss: 0.4856
100%|██████████

In [33]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.985225, 0.04357198261991143)
Valid: (0.8799, 0.5692433110237122)


In [34]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [35]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.987625, 0.03726983153373003)
Valid: (0.8821, 0.5420897490501404)


In [36]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [37]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9866, 0.039733015621453524)
Valid: (0.8821, 0.5420750851631164)


### Lambda = 1e-4

In [38]:
model.load_state_dict(initial_state)
optimizer = AcceleratedSGD(model.parameters(), 1e-2, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-4)
logger = Logger("vgg_log_augmentation_lambda=1e-4.txt.no_resizing")

In [39]:
torch.manual_seed(2020)
epochs = 45

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:21<00:00, 14.23it/s, loss=2.1114]
Epoch 1 | Training loss: 2.1114, validation accuracy: 0.2785, validation loss: 1.8693
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 14.18it/s, loss=1.7766]
Epoch 2 | Training loss: 1.7766, validation accuracy: 0.3857, validation loss: 1.5792
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 14.16it/s, loss=1.5649]
Epoch 3 | Training loss: 1.5649, validation accuracy: 0.4610, validation loss: 1.4182
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 14.16it/s, loss=1.3860]
Epoch 4 | Training loss: 1.3860, validation accuracy: 0.5615, validation loss: 1.2020
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 14.19it/s, loss=1.2405]
Epoch 5 | Training loss: 1.2405, validation accuracy: 0.5902, validation loss: 1.1372
100%|██████████

In [40]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.942475, 0.17267852897644043)
Valid: (0.861, 0.4798047291755676)


In [41]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [42]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9704, 0.09356355343461037)
Valid: (0.8815, 0.39055089263916015)


In [43]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [44]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.97135, 0.09292823143005371)
Valid: (0.8815, 0.39056040353775023)


In [45]:
optimizer.param_groups[0]["lr"] = 1e-3

In [46]:
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 14.15it/s, loss=0.1124]
Epoch 1 | Training loss: 0.1124, validation accuracy: 0.8755, validation loss: 0.4796
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 14.11it/s, loss=0.0897]
Epoch 2 | Training loss: 0.0897, validation accuracy: 0.8764, validation loss: 0.4926
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 14.12it/s, loss=0.0845]
Epoch 3 | Training loss: 0.0845, validation accuracy: 0.8791, validation loss: 0.4938
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 14.11it/s, loss=0.0803]
Epoch 4 | Training loss: 0.0803, validation accuracy: 0.8798, validation loss: 0.4929
100%|██████████████████████████████████████████████████████████| 313/313 [00:22<00:00, 14.11it/s, loss=0.0737]
Epoch 5 | Training loss: 0.0737, validation accuracy: 0.8783, validation loss: 0.4952
100%|██████████

In [47]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.985325, 0.04265370637327433)
Valid: (0.8823, 0.560452561378479)


In [48]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [49]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.986875, 0.039506063325703146)
Valid: (0.8831, 0.5457372913360595)


In [50]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [51]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.98695, 0.038688352712243794)
Valid: (0.8831, 0.5457269618988038)


## Epoch average

In [52]:
model.load_state_dict(initial_state)
model.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [53]:
optimizer = AcceleratedSGD(model.parameters(), 1e-2, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8, 
                           mode="epoch_avg")
logger = Logger("vgg_log_augmentation_average.txt.no_resizing")

In [54]:
torch.manual_seed(2020)
epochs = 45

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.27it/s, loss=2.0912]
Epoch 1 | Training loss: 2.0912, validation accuracy: 0.2706, validation loss: 1.8731
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.24it/s, loss=1.7733]
Epoch 2 | Training loss: 1.7733, validation accuracy: 0.3971, validation loss: 1.5606
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.23it/s, loss=1.5655]
Epoch 3 | Training loss: 1.5655, validation accuracy: 0.4388, validation loss: 1.5265
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.24it/s, loss=1.3787]
Epoch 4 | Training loss: 1.3787, validation accuracy: 0.5741, validation loss: 1.1734
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.21it/s, loss=1.2496]
Epoch 5 | Training loss: 1.2496, validation accuracy: 0.6061, validation loss: 1.1240
100%|██████████

In [55]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9398, 0.18006726288795472)
Valid: (0.8592, 0.5084918577194214)


In [56]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [57]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.870825, 0.4372392312049866)
Valid: (0.8403, 0.49424679012298584)


In [58]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [59]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.8686, 0.44026174283027647)
Valid: (0.8394, 0.49633121128082275)


In [60]:
optimizer.param_groups[0]["lr"] = 1e-3

In [61]:
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.21it/s, loss=0.1113]
Epoch 1 | Training loss: 0.1113, validation accuracy: 0.8738, validation loss: 0.4672
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.20it/s, loss=0.0892]
Epoch 2 | Training loss: 0.0892, validation accuracy: 0.8735, validation loss: 0.4936
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.20it/s, loss=0.0860]
Epoch 3 | Training loss: 0.0860, validation accuracy: 0.8764, validation loss: 0.4866
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.18it/s, loss=0.0757]
Epoch 4 | Training loss: 0.0757, validation accuracy: 0.8763, validation loss: 0.4989
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.19it/s, loss=0.0749]
Epoch 5 | Training loss: 0.0749, validation accuracy: 0.8774, validation loss: 0.4930
100%|██████████

In [62]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.98535, 0.042094461554288864)
Valid: (0.8761, 0.580595885848999)


In [63]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [64]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.967075, 0.10842742094993592)
Valid: (0.8777, 0.3873641902923584)


In [65]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [66]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.970325, 0.09669469264149666)
Valid: (0.8786, 0.3929103065490723)


## Epoch average, span = 100

In [67]:
model.load_state_dict(initial_state)
model.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [68]:
optimizer = AcceleratedSGD(model.parameters(), 1e-2, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8, 
                           mode="epoch_avg", avg_alpha=2 / (100 + 1))
logger = Logger("vgg_log_augmentation_span100.txt.no_resizing")

In [69]:
torch.manual_seed(2020)
epochs = 45

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.28it/s, loss=2.0947]
Epoch 1 | Training loss: 2.0947, validation accuracy: 0.2766, validation loss: 1.8416
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.24it/s, loss=1.7715]
Epoch 2 | Training loss: 1.7715, validation accuracy: 0.4127, validation loss: 1.5381
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.21it/s, loss=1.5534]
Epoch 3 | Training loss: 1.5534, validation accuracy: 0.4561, validation loss: 1.4414
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.22it/s, loss=1.3651]
Epoch 4 | Training loss: 1.3651, validation accuracy: 0.5594, validation loss: 1.2247
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.23it/s, loss=1.2241]
Epoch 5 | Training loss: 1.2241, validation accuracy: 0.5989, validation loss: 1.0845
100%|██████████

In [70]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.93215, 0.20444189097881318)
Valid: (0.8476, 0.5474414356231689)


In [71]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [72]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.97335, 0.08969548756480217)
Valid: (0.8815, 0.3939942355155945)


In [73]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [74]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9734, 0.08802065689563751)
Valid: (0.8816, 0.393991511631012)


In [75]:
optimizer.param_groups[0]["lr"] = 1e-3

In [76]:
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.19it/s, loss=0.1113]
Epoch 1 | Training loss: 0.1113, validation accuracy: 0.8754, validation loss: 0.4813
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.19it/s, loss=0.0846]
Epoch 2 | Training loss: 0.0846, validation accuracy: 0.8785, validation loss: 0.5058
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.17it/s, loss=0.0803]
Epoch 3 | Training loss: 0.0803, validation accuracy: 0.8799, validation loss: 0.4945
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.21it/s, loss=0.0784]
Epoch 4 | Training loss: 0.0784, validation accuracy: 0.8797, validation loss: 0.5098
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.16it/s, loss=0.0735]
Epoch 5 | Training loss: 0.0735, validation accuracy: 0.8792, validation loss: 0.4986
100%|██████████

In [77]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.986975, 0.0395649989373982)
Valid: (0.8805, 0.5659612306594849)


In [78]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [79]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.987425, 0.038295551287382844)
Valid: (0.8824, 0.5524765687942504)


In [80]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [81]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.986825, 0.037698678241670135)
Valid: (0.8825, 0.5524682126998901)


## Epoch average, span = 50

In [82]:
model.load_state_dict(initial_state)
model.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [83]:
optimizer = AcceleratedSGD(model.parameters(), 1e-2, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8, 
                           mode="epoch_avg", avg_alpha=2 / (50 + 1))
logger = Logger("vgg_log_augmentation_span50.txt.no_resizing")

In [84]:
torch.manual_seed(2020)
epochs = 45

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.28it/s, loss=2.0956]
Epoch 1 | Training loss: 2.0956, validation accuracy: 0.2659, validation loss: 1.8612
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.26it/s, loss=1.7643]
Epoch 2 | Training loss: 1.7643, validation accuracy: 0.3944, validation loss: 1.5618
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.23it/s, loss=1.5519]
Epoch 3 | Training loss: 1.5519, validation accuracy: 0.4192, validation loss: 1.5592
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.23it/s, loss=1.3819]
Epoch 4 | Training loss: 1.3819, validation accuracy: 0.5652, validation loss: 1.1922
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.22it/s, loss=1.2422]
Epoch 5 | Training loss: 1.2422, validation accuracy: 0.6101, validation loss: 1.0908
100%|██████████

In [85]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.947025, 0.16146137601137162)
Valid: (0.8613, 0.497474157333374)


In [86]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [87]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.971, 0.09410841317176818)
Valid: (0.8776, 0.41523076324462893)


In [88]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [89]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.970875, 0.09302963387966157)
Valid: (0.8776, 0.41520702381134034)


In [90]:
optimizer.param_groups[0]["lr"] = 1e-3

In [91]:
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.19it/s, loss=0.1130]
Epoch 1 | Training loss: 0.1130, validation accuracy: 0.8715, validation loss: 0.4981
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.18it/s, loss=0.0906]
Epoch 2 | Training loss: 0.0906, validation accuracy: 0.8725, validation loss: 0.5187
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.18it/s, loss=0.0834]
Epoch 3 | Training loss: 0.0834, validation accuracy: 0.8729, validation loss: 0.5177
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.18it/s, loss=0.0798]
Epoch 4 | Training loss: 0.0798, validation accuracy: 0.8723, validation loss: 0.5291
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.17it/s, loss=0.0760]
Epoch 5 | Training loss: 0.0760, validation accuracy: 0.8731, validation loss: 0.5353
100%|██████████

In [92]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.98645, 0.039574689316749574)
Valid: (0.8746, 0.6010501583099366)


In [93]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [94]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.987125, 0.03959940095469355)
Valid: (0.8777, 0.5853159385681153)


In [95]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [96]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.986275, 0.03967122686877847)
Valid: (0.8777, 0.5853097663879394)


## Epoch average, span = 10

In [97]:
model.load_state_dict(initial_state)
model.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [98]:
optimizer = AcceleratedSGD(model.parameters(), 1e-2, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8, 
                           mode="epoch_avg", avg_alpha=2 / (10 + 1))
logger = Logger("vgg_log_augmentation_span10.txt.no_resizing")

In [99]:
torch.manual_seed(2020)
epochs = 45

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.27it/s, loss=2.0803]
Epoch 1 | Training loss: 2.0803, validation accuracy: 0.2927, validation loss: 1.7931
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.24it/s, loss=1.7657]
Epoch 2 | Training loss: 1.7657, validation accuracy: 0.4020, validation loss: 1.5578
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.23it/s, loss=1.5593]
Epoch 3 | Training loss: 1.5593, validation accuracy: 0.4419, validation loss: 1.4899
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.23it/s, loss=1.3841]
Epoch 4 | Training loss: 1.3841, validation accuracy: 0.5712, validation loss: 1.1810
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.24it/s, loss=1.2283]
Epoch 5 | Training loss: 1.2283, validation accuracy: 0.6297, validation loss: 1.0486
100%|██████████

In [100]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.94465, 0.16465158625841142)
Valid: (0.8628, 0.5324086793899536)


In [101]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [102]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9703, 0.09419253569245338)
Valid: (0.88, 0.4036796995162964)


In [103]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [104]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9724, 0.09156930482387543)
Valid: (0.8799, 0.40365803604125977)


In [105]:
optimizer.param_groups[0]["lr"] = 1e-3

In [106]:
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.22it/s, loss=0.1125]
Epoch 1 | Training loss: 0.1125, validation accuracy: 0.8750, validation loss: 0.4864
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.17it/s, loss=0.0903]
Epoch 2 | Training loss: 0.0903, validation accuracy: 0.8747, validation loss: 0.4994
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.19it/s, loss=0.0857]
Epoch 3 | Training loss: 0.0857, validation accuracy: 0.8755, validation loss: 0.4980
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.18it/s, loss=0.0793]
Epoch 4 | Training loss: 0.0793, validation accuracy: 0.8754, validation loss: 0.5178
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.20it/s, loss=0.0759]
Epoch 5 | Training loss: 0.0759, validation accuracy: 0.8765, validation loss: 0.5059
100%|██████████

In [107]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.98555, 0.041348774747550485)
Valid: (0.8778, 0.5650828149795533)


In [108]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [109]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.98755, 0.03792667396739125)
Valid: (0.8777, 0.5602526610374451)


In [110]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [111]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.986525, 0.03883714797794819)
Valid: (0.8777, 0.5602180318832397)


## Epoch average, alpha = 0.9

In [112]:
model.load_state_dict(initial_state)
model.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [113]:
optimizer = AcceleratedSGD(model.parameters(), 1e-2, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8, 
                           mode="epoch_avg", avg_alpha=0.9)
logger = Logger("vgg_log_augmentation_alpha_09.txt.no_resizing")

In [114]:
torch.manual_seed(2020)
epochs = 45

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.28it/s, loss=2.0887]
Epoch 1 | Training loss: 2.0887, validation accuracy: 0.2760, validation loss: 1.8180
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.25it/s, loss=1.7669]
Epoch 2 | Training loss: 1.7669, validation accuracy: 0.3875, validation loss: 1.5624
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.23it/s, loss=1.5616]
Epoch 3 | Training loss: 1.5616, validation accuracy: 0.4223, validation loss: 1.5375
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.24it/s, loss=1.3857]
Epoch 4 | Training loss: 1.3857, validation accuracy: 0.5804, validation loss: 1.1481
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.24it/s, loss=1.2403]
Epoch 5 | Training loss: 1.2403, validation accuracy: 0.6188, validation loss: 1.0620
100%|██████████

In [115]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.94145, 0.17383630282878876)
Valid: (0.8631, 0.4949284041404724)


In [116]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [117]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.96975, 0.0962745177090168)
Valid: (0.8832, 0.3893376468658447)


In [118]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [119]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9699, 0.0959766697049141)
Valid: (0.8832, 0.38928272047042844)


In [120]:
optimizer.param_groups[0]["lr"] = 1e-3

In [121]:
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.17it/s, loss=0.1121]
Epoch 1 | Training loss: 0.1121, validation accuracy: 0.8807, validation loss: 0.4551
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.16it/s, loss=0.0919]
Epoch 2 | Training loss: 0.0919, validation accuracy: 0.8803, validation loss: 0.4736
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.19it/s, loss=0.0857]
Epoch 3 | Training loss: 0.0857, validation accuracy: 0.8816, validation loss: 0.4709
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.16it/s, loss=0.0817]
Epoch 4 | Training loss: 0.0817, validation accuracy: 0.8817, validation loss: 0.4843
100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.19it/s, loss=0.0779]
Epoch 5 | Training loss: 0.0779, validation accuracy: 0.8836, validation loss: 0.4732
100%|██████████

In [122]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.98485, 0.04473927171379328)
Valid: (0.8842, 0.5525007114410401)


In [123]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [124]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.986425, 0.04133399092853069)
Valid: (0.8849, 0.5423776842117309)


In [125]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [126]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.98605, 0.041012950982153414)
Valid: (0.8849, 0.5423800123214721)
