In [1]:
import sys
sys.path.insert(0, "../..")

In [2]:
import torch
from torch import nn
from torchvision import models
from copy import deepcopy
import os

from nn_extrapolation import AcceleratedSGD
from nn_utils import *

In [3]:
torch.cuda.is_available()

True

In [4]:
trainer = Trainer(
    device="cuda:0",
    loss_fn=nn.NLLLoss(reduction="mean"),
    val_loss_fn=nn.NLLLoss(reduction="sum"),
)

In [5]:
dl = load_dataset(
    dataset="CIFAR10",
    root=os.path.join("/tmp", os.environ["USER"], "CIFAR"),
    augmentation=transforms.RandomAffine(10, scale=(0.9, 1.1), translate=(0.2, 0.2)),
    validation_split=0.2,
    batch_size=128,
    num_workers=10,
)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
model = models.resnet18(pretrained=False)
model.fc = nn.Sequential(
    nn.Linear(512, 10),
    nn.LogSoftmax(-1)
)
initial_state = deepcopy(model.state_dict())
model.to(trainer.device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [7]:
state_path = "resnet_initial_state.pth"
if os.path.exists(state_path):
    initial_state = torch.load(state_path)
    model.load_state_dict(initial_state)
    model.to(trainer.device)
else:
    torch.save(initial_state, state_path)

In [8]:
trainer.validation(model, dl["valid"])

(0.1024, 2.3916895107269287)

## No momentum

In [10]:
optimizer = AcceleratedSGD(model.parameters(), 1e-1, k=10, momentum=0, weight_decay=0, lambda_=1e-8)
logger = Logger("resnet_log_augmentation_no_momentum.txt.no_resizing")
torch.manual_seed(2020)

<torch._C.Generator at 0x7f0654071110>

In [11]:
epochs = 45

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 55.69it/s, loss=1.9005]
Epoch 1 | Training loss: 1.9005, validation accuracy: 0.3473, validation loss: 2.1367
100%|██████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 55.81it/s, loss=1.5258]
Epoch 2 | Training loss: 1.5258, validation accuracy: 0.3476, validation loss: 1.9506
100%|██████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 54.82it/s, loss=1.3823]
Epoch 3 | Training loss: 1.3823, validation accuracy: 0.4380, validation loss: 1.9812
100%|██████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 55.15it/s, loss=1.2835]
Epoch 4 | Training loss: 1.2835, validation accuracy: 0.5254, validation loss: 1.3274
100%|██████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 55.11it/s, loss=1.1959]
Epoch 5 | Training loss: 1.1959, validation accuracy: 0.5035, validation loss: 1.5389
100%|██████████

In [12]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.841075, 0.4407322925567627)
Valid: (0.7818, 0.7197161993026734)


In [13]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [14]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.8447, 0.43828747572898863)
Valid: (0.7999, 0.620662706565857)


In [15]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [16]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.84675, 0.4382346224784851)
Valid: (0.7999, 0.6206391641616821)


In [17]:
optimizer.param_groups[0]["lr"] = 1e-2

In [18]:
epochs = 20

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 54.34it/s, loss=0.3787]
Epoch 1 | Training loss: 0.3787, validation accuracy: 0.8047, validation loss: 0.6223
100%|██████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 53.73it/s, loss=0.3520]
Epoch 2 | Training loss: 0.3520, validation accuracy: 0.8095, validation loss: 0.6253
100%|██████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 54.73it/s, loss=0.3389]
Epoch 3 | Training loss: 0.3389, validation accuracy: 0.8090, validation loss: 0.6298
100%|██████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 54.16it/s, loss=0.3321]
Epoch 4 | Training loss: 0.3321, validation accuracy: 0.8101, validation loss: 0.6292
100%|██████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 54.32it/s, loss=0.3288]
Epoch 5 | Training loss: 0.3288, validation accuracy: 0.8132, validation loss: 0.6317
100%|██████████

In [19]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.91115, 0.25244254360198976)
Valid: (0.8081, 0.6798822336196899)


In [20]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [21]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9034, 0.26764912447929384)
Valid: (0.8078, 0.6828980052947998)


In [22]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [23]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.901525, 0.2732176531076431)
Valid: (0.8078, 0.6828953104019165)


In [24]:
optimizer.param_groups[0]["lr"] = 1e-3

In [25]:
epochs = 20

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 53.92it/s, loss=0.2765]
Epoch 1 | Training loss: 0.2765, validation accuracy: 0.8088, validation loss: 0.6718
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 52.82it/s, loss=0.2707]
Epoch 2 | Training loss: 0.2707, validation accuracy: 0.8106, validation loss: 0.6690
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 53.08it/s, loss=0.2686]
Epoch 3 | Training loss: 0.2686, validation accuracy: 0.8076, validation loss: 0.6730
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/

In [26]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.915525, 0.2366906145811081)
Valid: (0.8111, 0.6787169399261475)


In [27]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [28]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.915125, 0.23779242656230926)
Valid: (0.8104, 0.6793488918304443)


In [29]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [30]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.91425, 0.2407463965654373)
Valid: (0.8104, 0.6793475513458251)


## Momentum

### Lambda = 1e-16

In [31]:
model.load_state_dict(initial_state)
model.to(trainer.device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [32]:
trainer.validation(model, dl["valid"])

(0.1024, 2.3916895107269287)

In [33]:
optimizer = AcceleratedSGD(model.parameters(), 1e-1, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-16)
logger = Logger("resnet_log_augmentation-lambda=1e-16.txt.no_resizing")
torch.manual_seed(2020)

<torch._C.Generator at 0x7f0654071110>

In [34]:
epochs = 45

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 50.88it/s, loss=2.2130]
Epoch 1 | Training loss: 2.2130, validation accuracy: 0.3482, validation loss: 1.8887
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 51.29it/s, loss=1.6590]
Epoch 2 | Training loss: 1.6590, validation accuracy: 0.4302, validation loss: 1.5467
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 50.86it/s, loss=1.5189]
Epoch 3 | Training loss: 1.5189, validation accuracy: 0.4832, validation loss: 1.4076
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/

In [35]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.845825, 0.436823532295227)
Valid: (0.7918, 0.6765367492675781)


In [36]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [37]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.84085, 0.4385167631149292)
Valid: (0.7971, 0.6236986894607544)


In [38]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [39]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.839775, 0.4405800078868866)
Valid: (0.7971, 0.6236781562805176)


In [40]:
optimizer.param_groups[0]["lr"] = 1e-2

In [41]:
epochs = 20

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 50.77it/s, loss=0.3618]
Epoch 1 | Training loss: 0.3618, validation accuracy: 0.8211, validation loss: 0.6006
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 50.55it/s, loss=0.3285]
Epoch 2 | Training loss: 0.3285, validation accuracy: 0.8204, validation loss: 0.5971
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 52.01it/s, loss=0.3190]
Epoch 3 | Training loss: 0.3190, validation accuracy: 0.8209, validation loss: 0.6138
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/

In [42]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.919425, 0.22524109711647033)
Valid: (0.8178, 0.6600747977256775)


In [43]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [44]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9185, 0.2258700907945633)
Valid: (0.8187, 0.6608206221580505)


In [45]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [46]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.916575, 0.230946377491951)
Valid: (0.8187, 0.66081487159729)


In [47]:
optimizer.param_groups[0]["lr"] = 1e-3

In [48]:
epochs = 20

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 50.53it/s, loss=0.2417]
Epoch 1 | Training loss: 0.2417, validation accuracy: 0.8227, validation loss: 0.6364
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 51.53it/s, loss=0.2388]
Epoch 2 | Training loss: 0.2388, validation accuracy: 0.8212, validation loss: 0.6331
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 51.04it/s, loss=0.2376]
Epoch 3 | Training loss: 0.2376, validation accuracy: 0.8205, validation loss: 0.6503
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/

In [49]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.926475, 0.20447875607013702)
Valid: (0.8243, 0.6578270555496216)


In [50]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [51]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.925775, 0.2063621623277664)
Valid: (0.8244, 0.6589616132736206)


In [52]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [53]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.92445, 0.21053753056526184)
Valid: (0.8244, 0.6589605768203736)


### Lambda = 1e-8

In [54]:
model.load_state_dict(initial_state)
model.to(trainer.device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [55]:
trainer.validation(model, dl["valid"])

(0.1024, 2.3916895107269287)

In [56]:
optimizer = AcceleratedSGD(model.parameters(), 1e-1, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8)
logger = Logger("resnet_log_augmentation.txt.no_resizing")
torch.manual_seed(2020)

<torch._C.Generator at 0x7f0654071110>

In [57]:
epochs = 45

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 53.04it/s, loss=2.2067]
Epoch 1 | Training loss: 2.2067, validation accuracy: 0.3481, validation loss: 1.8675
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 52.00it/s, loss=1.6652]
Epoch 2 | Training loss: 1.6652, validation accuracy: 0.4037, validation loss: 1.5892
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 53.55it/s, loss=1.5062]
Epoch 3 | Training loss: 1.5062, validation accuracy: 0.4608, validation loss: 1.4587
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/

In [58]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.857775, 0.4060311490058899)
Valid: (0.7967, 0.6371520111083985)


In [59]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [60]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.856825, 0.4043162042379379)
Valid: (0.8034, 0.6193351472854615)


In [61]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [62]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.85975, 0.4040320307254791)
Valid: (0.8034, 0.6193838153839112)


In [63]:
optimizer.param_groups[0]["lr"] = 1e-2

In [64]:
epochs = 20

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 51.95it/s, loss=0.3581]
Epoch 1 | Training loss: 0.3581, validation accuracy: 0.8186, validation loss: 0.5834
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 52.30it/s, loss=0.3269]
Epoch 2 | Training loss: 0.3269, validation accuracy: 0.8209, validation loss: 0.5844
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 51.99it/s, loss=0.3152]
Epoch 3 | Training loss: 0.3152, validation accuracy: 0.8217, validation loss: 0.5968
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/

In [65]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.920725, 0.2241452866077423)
Valid: (0.8227, 0.6296474899291992)


In [66]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [67]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.915925, 0.2331008595943451)
Valid: (0.8195, 0.6519404945373535)


In [68]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [69]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.915675, 0.2386408855199814)
Valid: (0.8195, 0.6519443071365356)


In [70]:
optimizer.param_groups[0]["lr"] = 1e-3

In [71]:
epochs = 20

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 52.58it/s, loss=0.2453]
Epoch 1 | Training loss: 0.2453, validation accuracy: 0.8224, validation loss: 0.6194
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 52.62it/s, loss=0.2362]
Epoch 2 | Training loss: 0.2362, validation accuracy: 0.8228, validation loss: 0.6214
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 52.40it/s, loss=0.2356]
Epoch 3 | Training loss: 0.2356, validation accuracy: 0.8222, validation loss: 0.6391
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/

In [72]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.926625, 0.20792481157779694)
Valid: (0.8227, 0.6430280235290528)


In [73]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [74]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.926375, 0.2061994443178177)
Valid: (0.8215, 0.6464300540924073)


In [75]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [76]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9254, 0.20871990084648132)
Valid: (0.8215, 0.6464307264328003)


### Lambda = 1e-4

In [77]:
model.load_state_dict(initial_state)
model.to(trainer.device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [78]:
trainer.validation(model, dl["valid"])

(0.1024, 2.3916895107269287)

In [79]:
optimizer = AcceleratedSGD(model.parameters(), 1e-1, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-4)
logger = Logger("resnet_log_augmentation-lambda=1e-4.txt.no_resizing")
torch.manual_seed(2020)

<torch._C.Generator at 0x7f0654071110>

In [80]:
epochs = 45

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 53.46it/s, loss=2.2129]
Epoch 1 | Training loss: 2.2129, validation accuracy: 0.3309, validation loss: 1.7766
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 53.10it/s, loss=1.6514]
Epoch 2 | Training loss: 1.6514, validation accuracy: 0.4517, validation loss: 1.4797
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 52.76it/s, loss=1.5141]
Epoch 3 | Training loss: 1.5141, validation accuracy: 0.5028, validation loss: 1.3573
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/

In [81]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.8513, 0.4179581168651581)
Valid: (0.7949, 0.6471888906478882)


In [82]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [83]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.861475, 0.38991186656951904)
Valid: (0.8023, 0.6106670978546143)


In [84]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [85]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.864325, 0.3861743935585022)
Valid: (0.8025, 0.610670313167572)


In [86]:
optimizer.param_groups[0]["lr"] = 1e-2

In [87]:
epochs = 20

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 54.08it/s, loss=0.3663]
Epoch 1 | Training loss: 0.3663, validation accuracy: 0.8119, validation loss: 0.6035
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 53.35it/s, loss=0.3282]
Epoch 2 | Training loss: 0.3282, validation accuracy: 0.8136, validation loss: 0.6090
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 53.67it/s, loss=0.3156]
Epoch 3 | Training loss: 0.3156, validation accuracy: 0.8191, validation loss: 0.6138
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/

In [88]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9208, 0.22460987005233765)
Valid: (0.8161, 0.6668506631851197)


In [89]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [90]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.917475, 0.23080288746356964)
Valid: (0.812, 0.6697820486068725)


In [91]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [92]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.914825, 0.2378620442390442)
Valid: (0.8119, 0.6697914031982422)


In [93]:
optimizer.param_groups[0]["lr"] = 1e-3

In [94]:
epochs = 20

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:05<00:00, 52.75it/s, loss=0.2390]
Epoch 1 | Training loss: 0.2390, validation accuracy: 0.8186, validation loss: 0.6527
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 51.64it/s, loss=0.2391]
Epoch 2 | Training loss: 0.2391, validation accuracy: 0.8184, validation loss: 0.6510
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 51.56it/s, loss=0.2340]
Epoch 3 | Training loss: 0.2340, validation accuracy: 0.8170, validation loss: 0.6671
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/

In [95]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.92775, 0.20241926593780518)
Valid: (0.8183, 0.6668149614334107)


In [96]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [97]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.925525, 0.2059572431564331)
Valid: (0.8172, 0.669610499382019)


In [98]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [99]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.926675, 0.20393958277702331)
Valid: (0.8172, 0.6696112274169922)


## Exponential moving average, span = 10

In [102]:
model.load_state_dict(initial_state)
model.to(trainer.device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [103]:
trainer.validation(model, dl["valid"])

(0.1024, 2.3916895107269287)

In [104]:
optimizer = AcceleratedSGD(model.parameters(), 1e-2, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8, 
                           mode="epoch_avg", avg_alpha=2 / (10 + 1))
logger = Logger("resnet_log_augmentation_span=10.txt.no_resizing")
torch.manual_seed(2020)

<torch._C.Generator at 0x7f0654071110>

In [105]:
epochs = 45

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 50.10it/s, loss=1.8369]
Epoch 1 | Training loss: 1.8369, validation accuracy: 0.4214, validation loss: 1.5839
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 50.41it/s, loss=1.5349]
Epoch 2 | Training loss: 1.5349, validation accuracy: 0.4904, validation loss: 1.4088
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 50.75it/s, loss=1.4009]
Epoch 3 | Training loss: 1.4009, validation accuracy: 0.5555, validation loss: 1.2691
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/

In [106]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.844175, 0.4342851956367493)
Valid: (0.7906, 0.681303299331665)


In [107]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [108]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.851525, 0.41859201264381407)
Valid: (0.7958, 0.620941765499115)


In [109]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [110]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.851625, 0.41888185758590696)
Valid: (0.7958, 0.6209063492774963)


In [111]:
optimizer.param_groups[0]["lr"] = 1e-2

In [112]:
epochs = 20

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 49.27it/s, loss=0.4466]
Epoch 1 | Training loss: 0.4466, validation accuracy: 0.7960, validation loss: 0.6625
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 47.39it/s, loss=0.4417]
Epoch 2 | Training loss: 0.4417, validation accuracy: 0.7864, validation loss: 0.6928
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 47.75it/s, loss=0.4369]
Epoch 3 | Training loss: 0.4369, validation accuracy: 0.7907, validation loss: 0.6741
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/

In [113]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.8917, 0.30008010478019714)
Valid: (0.7908, 0.744043511390686)


In [114]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [115]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.90705, 0.26277397770881655)
Valid: (0.8132, 0.6498286313056946)


In [116]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [117]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9081, 0.2623101516246796)
Valid: (0.8133, 0.6498181837081909)


In [118]:
optimizer.param_groups[0]["lr"] = 1e-3

In [119]:
epochs = 20

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 49.64it/s, loss=0.2590]
Epoch 1 | Training loss: 0.2590, validation accuracy: 0.8139, validation loss: 0.6633
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 49.36it/s, loss=0.2349]
Epoch 2 | Training loss: 0.2349, validation accuracy: 0.8172, validation loss: 0.6681
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:06<00:00, 49.25it/s, loss=0.2233]
Epoch 3 | Training loss: 0.2233, validation accuracy: 0.8155, validation loss: 0.6839
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/

In [120]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.94865, 0.14897416538000108)
Valid: (0.8188, 0.7443863039016724)


In [121]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [122]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.941725, 0.1626715887069702)
Valid: (0.8156, 0.7739826675415039)


In [123]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [124]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.94445, 0.15734280723333358)
Valid: (0.8156, 0.7739893617630005)


## Resizing

In [98]:
trainer = Trainer(
    device="cuda:0",
    loss_fn=nn.NLLLoss(reduction="mean"),
    val_loss_fn=nn.NLLLoss(reduction="sum"),
)

In [99]:
dl = load_dataset(
    dataset="CIFAR10",
    root=os.path.join("/tmp", os.environ["USER"], "CIFAR"),
    augmentation=transforms.RandomAffine(10, scale=(0.9, 1.1), translate=(0.2, 0.2)),
    validation_split=0.2,
    batch_size=128,
    num_workers=10,
    target_size=(224, 224),
)



Files already downloaded and verified
Files already downloaded and verified


In [100]:
model.load_state_dict(initial_state)
model.to(trainer.device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [101]:
trainer.validation(model, dl["valid"])

(0.1113, 2.5140885375976563)

In [102]:
optimizer = AcceleratedSGD(model.parameters(), 1e-1, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8)
logger = Logger("resnet_log_augmentation.txt")
torch.manual_seed(2020)

<torch._C.Generator at 0x7f991411b250>

In [103]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [02:04<00:00,  2.51it/s, loss=2.2682]
Epoch 1 | Training loss: 2.2682, validation accuracy: 0.2751, validation loss: 1.9231
100%|██████████| 313/313 [02:05<00:00,  2.49it/s, loss=1.7987]
Epoch 2 | Training loss: 1.7987, validation accuracy: 0.3842, validation loss: 1.6462
100%|██████████| 313/313 [02:05<00:00,  2.49it/s, loss=1.5382]
Epoch 3 | Training loss: 1.5382, validation accuracy: 0.4664, validation loss: 1.4636
100%|██████████| 313/313 [02:05<00:00,  2.50it/s, loss=1.3334]
Epoch 4 | Training loss: 1.3334, validation accuracy: 0.5451, validation loss: 1.3767
100%|██████████| 313/313 [02:03<00:00,  2.52it/s, loss=1.1522]
Epoch 5 | Training loss: 1.1522, validation accuracy: 0.6002, validation loss: 1.1925
100%|██████████| 313/313 [02:04<00:00,  2.51it/s, loss=0.9976]
Epoch 6 | Training loss: 0.9976, validation accuracy: 0.6539, validation loss: 1.0294
100%|██████████| 313/313 [02:04<00:00,  2.52it/s, loss=0.8722]
Epoch 7 | Training loss: 0.8722, validation

In [104]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.924875, 0.21868382456302643)
Valid: (0.8638, 0.5077923046112061)


In [105]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [106]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.91715, 0.23692127623558046)
Valid: (0.8688, 0.4422642339706421)


In [107]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [108]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.917825, 0.23501521935462952)
Valid: (0.8688, 0.4422644820213318)


In [109]:
optimizer.param_groups[0]["lr"] = 1e-2

In [110]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [02:05<00:00,  2.49it/s, loss=0.1235]
Epoch 1 | Training loss: 0.1235, validation accuracy: 0.8965, validation loss: 0.3596
100%|██████████| 313/313 [02:04<00:00,  2.52it/s, loss=0.0994]
Epoch 2 | Training loss: 0.0994, validation accuracy: 0.8943, validation loss: 0.3705
100%|██████████| 313/313 [02:05<00:00,  2.49it/s, loss=0.0871]
Epoch 3 | Training loss: 0.0871, validation accuracy: 0.8966, validation loss: 0.3676
100%|██████████| 313/313 [02:04<00:00,  2.51it/s, loss=0.0798]
Epoch 4 | Training loss: 0.0798, validation accuracy: 0.8989, validation loss: 0.3720
100%|██████████| 313/313 [02:04<00:00,  2.51it/s, loss=0.0736]
Epoch 5 | Training loss: 0.0736, validation accuracy: 0.8989, validation loss: 0.3732
100%|██████████| 313/313 [02:05<00:00,  2.49it/s, loss=0.0701]
Epoch 6 | Training loss: 0.0701, validation accuracy: 0.8972, validation loss: 0.3815
100%|██████████| 313/313 [02:04<00:00,  2.52it/s, loss=0.0657]
Epoch 7 | Training loss: 0.0657, validation

In [111]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.992175, 0.026007969420403242)
Valid: (0.898, 0.4474107147216797)


In [112]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [113]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.991075, 0.029470107208192348)
Valid: (0.8976, 0.45620492362976073)


In [114]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [115]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.99135, 0.030331627766788005)
Valid: (0.8976, 0.45620513763427734)


In [116]:
optimizer.param_groups[0]["lr"] = 1e-3

In [117]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [02:04<00:00,  2.51it/s, loss=0.0314]
Epoch 1 | Training loss: 0.0314, validation accuracy: 0.9000, validation loss: 0.4431
100%|██████████| 313/313 [02:05<00:00,  2.49it/s, loss=0.0310]
Epoch 2 | Training loss: 0.0310, validation accuracy: 0.9005, validation loss: 0.4461
100%|██████████| 313/313 [02:04<00:00,  2.52it/s, loss=0.0283]
Epoch 3 | Training loss: 0.0283, validation accuracy: 0.8995, validation loss: 0.4445
100%|██████████| 313/313 [02:06<00:00,  2.48it/s, loss=0.0276]
Epoch 4 | Training loss: 0.0276, validation accuracy: 0.8994, validation loss: 0.4422
100%|██████████| 313/313 [02:04<00:00,  2.52it/s, loss=0.0284]
Epoch 5 | Training loss: 0.0284, validation accuracy: 0.8994, validation loss: 0.4459
100%|██████████| 313/313 [02:04<00:00,  2.52it/s, loss=0.0282]
Epoch 6 | Training loss: 0.0282, validation accuracy: 0.8997, validation loss: 0.4456
100%|██████████| 313/313 [02:04<00:00,  2.52it/s, loss=0.0272]
Epoch 7 | Training loss: 0.0272, validation

In [118]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.994825, 0.01932948977351189)
Valid: (0.9003, 0.44885823802948)


In [119]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [120]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.994925, 0.01953197303712368)
Valid: (0.8996, 0.44897826642990113)


In [121]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [122]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.994425, 0.020918183160573246)
Valid: (0.8996, 0.44897833452224734)


In [123]:
exit