In [1]:
import sys
sys.path.insert(0, "../..")

In [2]:
import torch
from torch import nn
from torchvision import models
from copy import deepcopy
import os

from nn_extrapolation import AcceleratedSGD
from nn_utils import *

In [3]:
torch.cuda.is_available()

True

In [4]:
trainer = Trainer(
    device="cuda:0",
    loss_fn=nn.NLLLoss(reduction="mean"),
    val_loss_fn=nn.NLLLoss(reduction="sum"),
)

In [5]:
dl = load_dataset(
    dataset="CIFAR10",
    root=os.path.join("/tmp", os.environ["USER"], "CIFAR"),
    augmentation=transforms.RandomAffine(10, scale=(0.9, 1.1), translate=(0.2, 0.2)),
    validation_split=0.2,
    batch_size=128,
    num_workers=10,
)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
model = models.resnet18(pretrained=False)
model.fc = nn.Sequential(
    nn.Linear(512, 10),
    nn.LogSoftmax(-1)
)
initial_state = deepcopy(model.state_dict())
model.to(trainer.device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [7]:
trainer.validation(model, dl["valid"])

(0.0973, 2.3487395290374757)

## No momentum

In [8]:
optimizer = AcceleratedSGD(model.parameters(), 1e-1, k=10, momentum=0, weight_decay=0, lambda_=1e-8)
logger = Logger("resnet_log_augmentation_no_momentum.txt.no_resizing")
torch.manual_seed(2020)

<torch._C.Generator at 0x7f991411b250>

In [9]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:15<00:00, 20.60it/s, loss=1.9113]
Epoch 1 | Training loss: 1.9113, validation accuracy: 0.3912, validation loss: 1.7546
100%|██████████| 313/313 [00:15<00:00, 20.65it/s, loss=1.5291]
Epoch 2 | Training loss: 1.5291, validation accuracy: 0.4431, validation loss: 1.5080
100%|██████████| 313/313 [00:15<00:00, 20.47it/s, loss=1.3856]
Epoch 3 | Training loss: 1.3856, validation accuracy: 0.5028, validation loss: 1.3529
100%|██████████| 313/313 [00:16<00:00, 19.13it/s, loss=1.2800]
Epoch 4 | Training loss: 1.2800, validation accuracy: 0.5057, validation loss: 1.4907
100%|██████████| 313/313 [00:16<00:00, 18.85it/s, loss=1.1990]
Epoch 5 | Training loss: 1.1990, validation accuracy: 0.5586, validation loss: 1.3170
100%|██████████| 313/313 [00:16<00:00, 18.83it/s, loss=1.1340]
Epoch 6 | Training loss: 1.1340, validation accuracy: 0.5699, validation loss: 1.2577
100%|██████████| 313/313 [00:16<00:00, 18.89it/s, loss=1.0785]
Epoch 7 | Training loss: 1.0785, validation

In [10]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.765025, 0.6681581225395202)
Valid: (0.753, 0.7279932846069336)


In [11]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [12]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.75865, 0.6773087636947632)
Valid: (0.7365, 0.7447814754486084)


In [13]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [14]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.759925, 0.6742146611213684)
Valid: (0.7365, 0.7447812831878662)


In [15]:
optimizer.param_groups[0]["lr"] = 1e-2

In [16]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 19.38it/s, loss=0.5559]
Epoch 1 | Training loss: 0.5559, validation accuracy: 0.7867, validation loss: 0.6287
100%|██████████| 313/313 [00:16<00:00, 18.76it/s, loss=0.5310]
Epoch 2 | Training loss: 0.5310, validation accuracy: 0.7863, validation loss: 0.6352
100%|██████████| 313/313 [00:16<00:00, 18.74it/s, loss=0.5191]
Epoch 3 | Training loss: 0.5191, validation accuracy: 0.7887, validation loss: 0.6261
100%|██████████| 313/313 [00:16<00:00, 18.61it/s, loss=0.5154]
Epoch 4 | Training loss: 0.5154, validation accuracy: 0.7902, validation loss: 0.6352
100%|██████████| 313/313 [00:16<00:00, 18.64it/s, loss=0.5062]
Epoch 5 | Training loss: 0.5062, validation accuracy: 0.7919, validation loss: 0.6290
100%|██████████| 313/313 [00:16<00:00, 18.72it/s, loss=0.5016]
Epoch 6 | Training loss: 0.5016, validation accuracy: 0.7935, validation loss: 0.6371
100%|██████████| 313/313 [00:16<00:00, 18.74it/s, loss=0.4989]
Epoch 7 | Training loss: 0.4989, validation

In [17]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.85675, 0.40390582523345947)
Valid: (0.7965, 0.637440034866333)


In [18]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [19]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.8502, 0.4224253629207611)
Valid: (0.7924, 0.6581800193786621)


In [20]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [21]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.851275, 0.4192594344139099)
Valid: (0.7924, 0.6581800266265869)


In [22]:
optimizer.param_groups[0]["lr"] = 1e-3

In [23]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 19.44it/s, loss=0.4261]
Epoch 1 | Training loss: 0.4261, validation accuracy: 0.7991, validation loss: 0.6262
100%|██████████| 313/313 [00:16<00:00, 18.74it/s, loss=0.4224]
Epoch 2 | Training loss: 0.4224, validation accuracy: 0.7988, validation loss: 0.6338
100%|██████████| 313/313 [00:16<00:00, 18.80it/s, loss=0.4205]
Epoch 3 | Training loss: 0.4205, validation accuracy: 0.7996, validation loss: 0.6265
100%|██████████| 313/313 [00:16<00:00, 18.83it/s, loss=0.4170]
Epoch 4 | Training loss: 0.4170, validation accuracy: 0.8006, validation loss: 0.6359
100%|██████████| 313/313 [00:16<00:00, 18.70it/s, loss=0.4143]
Epoch 5 | Training loss: 0.4143, validation accuracy: 0.7994, validation loss: 0.6375
100%|██████████| 313/313 [00:16<00:00, 18.64it/s, loss=0.4170]
Epoch 6 | Training loss: 0.4170, validation accuracy: 0.7993, validation loss: 0.6370
100%|██████████| 313/313 [00:16<00:00, 18.66it/s, loss=0.4236]
Epoch 7 | Training loss: 0.4236, validation

In [24]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.86385, 0.3849111888408661)
Valid: (0.7988, 0.6374698726654052)


In [25]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [26]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.862175, 0.39002884130477905)
Valid: (0.7995, 0.6393859085083008)


In [27]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [28]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.8587, 0.39092918934822085)
Valid: (0.7995, 0.639385498046875)


## Momentum

### Lambda = 1e-16

In [29]:
model.load_state_dict(initial_state)
model.to(trainer.device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [30]:
trainer.validation(model, dl["valid"])

(0.0973, 2.3487395290374757)

In [31]:
optimizer = AcceleratedSGD(model.parameters(), 1e-1, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-16)
logger = Logger("resnet_log_augmentation-lambda=1e-16.txt.no_resizing")
torch.manual_seed(2020)

<torch._C.Generator at 0x7f991411b250>

In [32]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 19.04it/s, loss=2.3219]
Epoch 1 | Training loss: 2.3219, validation accuracy: 0.3290, validation loss: 1.9954
100%|██████████| 313/313 [00:17<00:00, 18.24it/s, loss=1.7005]
Epoch 2 | Training loss: 1.7005, validation accuracy: 0.4002, validation loss: 1.7723
100%|██████████| 313/313 [00:17<00:00, 18.26it/s, loss=1.5392]
Epoch 3 | Training loss: 1.5392, validation accuracy: 0.4673, validation loss: 1.4557
100%|██████████| 313/313 [00:17<00:00, 18.23it/s, loss=1.4188]
Epoch 4 | Training loss: 1.4188, validation accuracy: 0.5098, validation loss: 1.3614
100%|██████████| 313/313 [00:17<00:00, 18.26it/s, loss=1.3305]
Epoch 5 | Training loss: 1.3305, validation accuracy: 0.5462, validation loss: 1.2381
100%|██████████| 313/313 [00:17<00:00, 18.15it/s, loss=1.2491]
Epoch 6 | Training loss: 1.2491, validation accuracy: 0.5891, validation loss: 1.1421
100%|██████████| 313/313 [00:17<00:00, 18.09it/s, loss=1.1814]
Epoch 7 | Training loss: 1.1814, validation

In [33]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.785475, 0.6074533045768737)
Valid: (0.7684, 0.6857794876098633)


In [34]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [35]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.760175, 0.6804563817024231)
Valid: (0.7543, 0.7340531223297119)


In [36]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [37]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.763225, 0.6777015210151672)
Valid: (0.7543, 0.7340531635284424)


In [38]:
optimizer.param_groups[0]["lr"] = 1e-2

In [39]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 18.90it/s, loss=0.5531]
Epoch 1 | Training loss: 0.5531, validation accuracy: 0.7909, validation loss: 0.6157
100%|██████████| 313/313 [00:17<00:00, 18.23it/s, loss=0.5213]
Epoch 2 | Training loss: 0.5213, validation accuracy: 0.7947, validation loss: 0.6164
100%|██████████| 313/313 [00:17<00:00, 18.15it/s, loss=0.5106]
Epoch 3 | Training loss: 0.5106, validation accuracy: 0.7959, validation loss: 0.6138
100%|██████████| 313/313 [00:17<00:00, 18.09it/s, loss=0.4985]
Epoch 4 | Training loss: 0.4985, validation accuracy: 0.7975, validation loss: 0.6159
100%|██████████| 313/313 [00:17<00:00, 18.15it/s, loss=0.4899]
Epoch 5 | Training loss: 0.4899, validation accuracy: 0.7978, validation loss: 0.6146
100%|██████████| 313/313 [00:17<00:00, 18.14it/s, loss=0.4832]
Epoch 6 | Training loss: 0.4832, validation accuracy: 0.7960, validation loss: 0.6166
100%|██████████| 313/313 [00:17<00:00, 18.04it/s, loss=0.4788]
Epoch 7 | Training loss: 0.4788, validation

In [40]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.867925, 0.3674719787597656)
Valid: (0.8038, 0.6344386264801025)


In [41]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [42]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.8635, 0.3829957002162933)
Valid: (0.7988, 0.653402723121643)


In [43]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [44]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.864425, 0.38156471071243286)
Valid: (0.7988, 0.653402609539032)


In [45]:
optimizer.param_groups[0]["lr"] = 1e-3

In [46]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 18.88it/s, loss=0.3850]
Epoch 1 | Training loss: 0.3850, validation accuracy: 0.8051, validation loss: 0.6288
100%|██████████| 313/313 [00:17<00:00, 18.07it/s, loss=0.3802]
Epoch 2 | Training loss: 0.3802, validation accuracy: 0.8041, validation loss: 0.6275
100%|██████████| 313/313 [00:17<00:00, 17.98it/s, loss=0.3754]
Epoch 3 | Training loss: 0.3754, validation accuracy: 0.8050, validation loss: 0.6231
100%|██████████| 313/313 [00:17<00:00, 18.14it/s, loss=0.3745]
Epoch 4 | Training loss: 0.3745, validation accuracy: 0.8037, validation loss: 0.6302
100%|██████████| 313/313 [00:17<00:00, 18.29it/s, loss=0.3735]
Epoch 5 | Training loss: 0.3735, validation accuracy: 0.8053, validation loss: 0.6312
100%|██████████| 313/313 [00:17<00:00, 18.22it/s, loss=0.3722]
Epoch 6 | Training loss: 0.3722, validation accuracy: 0.8037, validation loss: 0.6308
100%|██████████| 313/313 [00:17<00:00, 18.26it/s, loss=0.3704]
Epoch 7 | Training loss: 0.3704, validation

In [47]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.881025, 0.334426127910614)
Valid: (0.8071, 0.6370200693130493)


In [48]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [49]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.87975, 0.3409732406616211)
Valid: (0.8055, 0.6415988466262817)


In [50]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [51]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.878375, 0.34059026198387143)
Valid: (0.8055, 0.6415991896629334)


### Lambda = 1e-8

In [52]:
model.load_state_dict(initial_state)
model.to(trainer.device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [53]:
trainer.validation(model, dl["valid"])

(0.0973, 2.3487395290374757)

In [54]:
optimizer = AcceleratedSGD(model.parameters(), 1e-1, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8)
logger = Logger("resnet_log_augmentation.txt.no_resizing")
torch.manual_seed(2020)

<torch._C.Generator at 0x7f991411b250>

In [55]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 19.10it/s, loss=2.3113]
Epoch 1 | Training loss: 2.3113, validation accuracy: 0.3096, validation loss: 1.8844
100%|██████████| 313/313 [00:17<00:00, 18.28it/s, loss=1.7500]
Epoch 2 | Training loss: 1.7500, validation accuracy: 0.3652, validation loss: 1.7059
100%|██████████| 313/313 [00:17<00:00, 18.14it/s, loss=1.5969]
Epoch 3 | Training loss: 1.5969, validation accuracy: 0.4360, validation loss: 1.5107
100%|██████████| 313/313 [00:17<00:00, 18.21it/s, loss=1.4858]
Epoch 4 | Training loss: 1.4858, validation accuracy: 0.5054, validation loss: 1.3616
100%|██████████| 313/313 [00:17<00:00, 18.07it/s, loss=1.3962]
Epoch 5 | Training loss: 1.3962, validation accuracy: 0.5429, validation loss: 1.2553
100%|██████████| 313/313 [00:17<00:00, 18.25it/s, loss=1.3038]
Epoch 6 | Training loss: 1.3038, validation accuracy: 0.5729, validation loss: 1.2011
100%|██████████| 313/313 [00:17<00:00, 18.21it/s, loss=1.2409]
Epoch 7 | Training loss: 1.2409, validation

In [56]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.760675, 0.6755115555763245)
Valid: (0.7521, 0.723914382648468)


In [57]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [58]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.7575, 0.708295360660553)
Valid: (0.7488, 0.7826698467254639)


In [59]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [60]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.75705, 0.7037416833877563)
Valid: (0.7488, 0.7826688014984131)


In [61]:
optimizer.param_groups[0]["lr"] = 1e-2

In [62]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 18.71it/s, loss=0.5803]
Epoch 1 | Training loss: 0.5803, validation accuracy: 0.7849, validation loss: 0.6298
100%|██████████| 313/313 [00:17<00:00, 17.94it/s, loss=0.5492]
Epoch 2 | Training loss: 0.5492, validation accuracy: 0.7854, validation loss: 0.6556
100%|██████████| 313/313 [00:17<00:00, 18.11it/s, loss=0.5355]
Epoch 3 | Training loss: 0.5355, validation accuracy: 0.7891, validation loss: 0.6525
100%|██████████| 313/313 [00:17<00:00, 18.15it/s, loss=0.5298]
Epoch 4 | Training loss: 0.5298, validation accuracy: 0.7893, validation loss: 0.6358
100%|██████████| 313/313 [00:17<00:00, 18.04it/s, loss=0.5210]
Epoch 5 | Training loss: 0.5210, validation accuracy: 0.7905, validation loss: 0.6450
100%|██████████| 313/313 [00:17<00:00, 17.99it/s, loss=0.5128]
Epoch 6 | Training loss: 0.5128, validation accuracy: 0.7958, validation loss: 0.6171
100%|██████████| 313/313 [00:17<00:00, 18.07it/s, loss=0.5078]
Epoch 7 | Training loss: 0.5078, validation

In [63]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.857825, 0.4000112359523773)
Valid: (0.8007, 0.622536738872528)


In [64]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [65]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.853325, 0.41404710788726806)
Valid: (0.7991, 0.6360378209114075)


In [66]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [67]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.854675, 0.4123635341644287)
Valid: (0.7991, 0.6360385458946228)


In [68]:
optimizer.param_groups[0]["lr"] = 1e-3

In [69]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 18.74it/s, loss=0.4175]
Epoch 1 | Training loss: 0.4175, validation accuracy: 0.8040, validation loss: 0.6268
100%|██████████| 313/313 [00:17<00:00, 18.12it/s, loss=0.4129]
Epoch 2 | Training loss: 0.4129, validation accuracy: 0.8044, validation loss: 0.6232
100%|██████████| 313/313 [00:17<00:00, 18.12it/s, loss=0.4101]
Epoch 3 | Training loss: 0.4101, validation accuracy: 0.8034, validation loss: 0.6224
100%|██████████| 313/313 [00:17<00:00, 18.06it/s, loss=0.4070]
Epoch 4 | Training loss: 0.4070, validation accuracy: 0.8012, validation loss: 0.6441
100%|██████████| 313/313 [00:17<00:00, 18.14it/s, loss=0.4026]
Epoch 5 | Training loss: 0.4026, validation accuracy: 0.7981, validation loss: 0.6761
100%|██████████| 313/313 [00:17<00:00, 17.99it/s, loss=0.4021]
Epoch 6 | Training loss: 0.4021, validation accuracy: 0.8037, validation loss: 0.6283
100%|██████████| 313/313 [00:17<00:00, 18.16it/s, loss=0.4066]
Epoch 7 | Training loss: 0.4066, validation

In [70]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.869075, 0.366251750087738)
Valid: (0.8035, 0.6334590641975403)


In [71]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [72]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.86805, 0.3686671387195587)
Valid: (0.8042, 0.6360383505821228)


In [73]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [74]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.869825, 0.36821668281555175)
Valid: (0.8042, 0.6360386254310608)


### Lambda = 1e-4

In [75]:
model.load_state_dict(initial_state)
model.to(trainer.device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [76]:
trainer.validation(model, dl["valid"])

(0.0973, 2.3487395290374757)

In [77]:
optimizer = AcceleratedSGD(model.parameters(), 1e-1, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-4)
logger = Logger("resnet_log_augmentation-lambda=1e-4.txt.no_resizing")
torch.manual_seed(2020)

<torch._C.Generator at 0x7f991411b250>

In [78]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 18.89it/s, loss=2.3087]
Epoch 1 | Training loss: 2.3087, validation accuracy: 0.3071, validation loss: 1.7720
100%|██████████| 313/313 [00:17<00:00, 18.13it/s, loss=1.7281]
Epoch 2 | Training loss: 1.7281, validation accuracy: 0.4158, validation loss: 1.5458
100%|██████████| 313/313 [00:17<00:00, 18.21it/s, loss=1.5676]
Epoch 3 | Training loss: 1.5676, validation accuracy: 0.4621, validation loss: 1.4762
100%|██████████| 313/313 [00:17<00:00, 18.22it/s, loss=1.4510]
Epoch 4 | Training loss: 1.4510, validation accuracy: 0.5209, validation loss: 1.3295
100%|██████████| 313/313 [00:17<00:00, 18.09it/s, loss=1.3685]
Epoch 5 | Training loss: 1.3685, validation accuracy: 0.5536, validation loss: 1.2403
100%|██████████| 313/313 [00:17<00:00, 18.05it/s, loss=1.2706]
Epoch 6 | Training loss: 1.2706, validation accuracy: 0.5834, validation loss: 1.1773
100%|██████████| 313/313 [00:17<00:00, 18.10it/s, loss=1.1999]
Epoch 7 | Training loss: 1.1999, validation

In [79]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.7876, 0.6074248841762543)
Valid: (0.7648, 0.69035461769104)


In [80]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [81]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.7619, 0.6707688877105713)
Valid: (0.7521, 0.7154559764862061)


In [82]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [83]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.7608, 0.6686750583171844)
Valid: (0.7521, 0.7154558208465576)


In [84]:
optimizer.param_groups[0]["lr"] = 1e-2

In [85]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 18.79it/s, loss=0.5434]
Epoch 1 | Training loss: 0.5434, validation accuracy: 0.7907, validation loss: 0.6282
100%|██████████| 313/313 [00:17<00:00, 17.98it/s, loss=0.5149]
Epoch 2 | Training loss: 0.5149, validation accuracy: 0.7920, validation loss: 0.6223
100%|██████████| 313/313 [00:17<00:00, 18.16it/s, loss=0.5010]
Epoch 3 | Training loss: 0.5010, validation accuracy: 0.7951, validation loss: 0.6233
100%|██████████| 313/313 [00:17<00:00, 18.03it/s, loss=0.4886]
Epoch 4 | Training loss: 0.4886, validation accuracy: 0.7954, validation loss: 0.6177
100%|██████████| 313/313 [00:17<00:00, 17.93it/s, loss=0.4780]
Epoch 5 | Training loss: 0.4780, validation accuracy: 0.7988, validation loss: 0.6155
100%|██████████| 313/313 [00:17<00:00, 18.13it/s, loss=0.4708]
Epoch 6 | Training loss: 0.4708, validation accuracy: 0.7998, validation loss: 0.6143
100%|██████████| 313/313 [00:17<00:00, 18.04it/s, loss=0.4718]
Epoch 7 | Training loss: 0.4718, validation

In [86]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.87295, 0.3574689694404602)
Valid: (0.8018, 0.6419424867630005)


In [87]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [88]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.8638, 0.3830557153224945)
Valid: (0.7945, 0.6668204811096191)


In [89]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [90]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.86545, 0.3832085563182831)
Valid: (0.7945, 0.6668202116012574)


In [91]:
optimizer.param_groups[0]["lr"] = 1e-3

In [92]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 18.66it/s, loss=0.3735]
Epoch 1 | Training loss: 0.3735, validation accuracy: 0.8026, validation loss: 0.6315
100%|██████████| 313/313 [00:17<00:00, 18.13it/s, loss=0.3723]
Epoch 2 | Training loss: 0.3723, validation accuracy: 0.8018, validation loss: 0.6351
100%|██████████| 313/313 [00:17<00:00, 18.21it/s, loss=0.3660]
Epoch 3 | Training loss: 0.3660, validation accuracy: 0.8029, validation loss: 0.6355
100%|██████████| 313/313 [00:17<00:00, 18.18it/s, loss=0.3659]
Epoch 4 | Training loss: 0.3659, validation accuracy: 0.8048, validation loss: 0.6394
100%|██████████| 313/313 [00:17<00:00, 18.13it/s, loss=0.3656]
Epoch 5 | Training loss: 0.3656, validation accuracy: 0.8034, validation loss: 0.6370
100%|██████████| 313/313 [00:17<00:00, 18.18it/s, loss=0.3616]
Epoch 6 | Training loss: 0.3616, validation accuracy: 0.8065, validation loss: 0.6426
100%|██████████| 313/313 [00:17<00:00, 18.04it/s, loss=0.3676]
Epoch 7 | Training loss: 0.3676, validation

In [93]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.88415, 0.32355640783309936)
Valid: (0.8044, 0.6536560085296631)


In [94]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [95]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.8824, 0.33033378829956056)
Valid: (0.8036, 0.6532367641448975)


In [96]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [97]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.88275, 0.3318396638393402)
Valid: (0.8036, 0.653237061882019)


## Resizing

In [98]:
trainer = Trainer(
    device="cuda:0",
    loss_fn=nn.NLLLoss(reduction="mean"),
    val_loss_fn=nn.NLLLoss(reduction="sum"),
)

In [99]:
dl = load_dataset(
    dataset="CIFAR10",
    root=os.path.join("/tmp", os.environ["USER"], "CIFAR"),
    augmentation=transforms.RandomAffine(10, scale=(0.9, 1.1), translate=(0.2, 0.2)),
    validation_split=0.2,
    batch_size=128,
    num_workers=10,
    target_size=(224, 224),
)



Files already downloaded and verified
Files already downloaded and verified


In [100]:
model.load_state_dict(initial_state)
model.to(trainer.device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [101]:
trainer.validation(model, dl["valid"])

(0.1113, 2.5140885375976563)

In [102]:
optimizer = AcceleratedSGD(model.parameters(), 1e-1, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8)
logger = Logger("resnet_log_augmentation.txt")
torch.manual_seed(2020)

<torch._C.Generator at 0x7f991411b250>

In [103]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [02:04<00:00,  2.51it/s, loss=2.2682]
Epoch 1 | Training loss: 2.2682, validation accuracy: 0.2751, validation loss: 1.9231
100%|██████████| 313/313 [02:05<00:00,  2.49it/s, loss=1.7987]
Epoch 2 | Training loss: 1.7987, validation accuracy: 0.3842, validation loss: 1.6462
100%|██████████| 313/313 [02:05<00:00,  2.49it/s, loss=1.5382]
Epoch 3 | Training loss: 1.5382, validation accuracy: 0.4664, validation loss: 1.4636
100%|██████████| 313/313 [02:05<00:00,  2.50it/s, loss=1.3334]
Epoch 4 | Training loss: 1.3334, validation accuracy: 0.5451, validation loss: 1.3767
100%|██████████| 313/313 [02:03<00:00,  2.52it/s, loss=1.1522]
Epoch 5 | Training loss: 1.1522, validation accuracy: 0.6002, validation loss: 1.1925
100%|██████████| 313/313 [02:04<00:00,  2.51it/s, loss=0.9976]
Epoch 6 | Training loss: 0.9976, validation accuracy: 0.6539, validation loss: 1.0294
100%|██████████| 313/313 [02:04<00:00,  2.52it/s, loss=0.8722]
Epoch 7 | Training loss: 0.8722, validation

In [104]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.924875, 0.21868382456302643)
Valid: (0.8638, 0.5077923046112061)


In [105]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [106]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.91715, 0.23692127623558046)
Valid: (0.8688, 0.4422642339706421)


In [107]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [108]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.917825, 0.23501521935462952)
Valid: (0.8688, 0.4422644820213318)


In [109]:
optimizer.param_groups[0]["lr"] = 1e-2

In [110]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [02:05<00:00,  2.49it/s, loss=0.1235]
Epoch 1 | Training loss: 0.1235, validation accuracy: 0.8965, validation loss: 0.3596
100%|██████████| 313/313 [02:04<00:00,  2.52it/s, loss=0.0994]
Epoch 2 | Training loss: 0.0994, validation accuracy: 0.8943, validation loss: 0.3705
100%|██████████| 313/313 [02:05<00:00,  2.49it/s, loss=0.0871]
Epoch 3 | Training loss: 0.0871, validation accuracy: 0.8966, validation loss: 0.3676
100%|██████████| 313/313 [02:04<00:00,  2.51it/s, loss=0.0798]
Epoch 4 | Training loss: 0.0798, validation accuracy: 0.8989, validation loss: 0.3720
100%|██████████| 313/313 [02:04<00:00,  2.51it/s, loss=0.0736]
Epoch 5 | Training loss: 0.0736, validation accuracy: 0.8989, validation loss: 0.3732
100%|██████████| 313/313 [02:05<00:00,  2.49it/s, loss=0.0701]
Epoch 6 | Training loss: 0.0701, validation accuracy: 0.8972, validation loss: 0.3815
100%|██████████| 313/313 [02:04<00:00,  2.52it/s, loss=0.0657]
Epoch 7 | Training loss: 0.0657, validation

In [111]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.992175, 0.026007969420403242)
Valid: (0.898, 0.4474107147216797)


In [112]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [113]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.991075, 0.029470107208192348)
Valid: (0.8976, 0.45620492362976073)


In [114]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [115]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.99135, 0.030331627766788005)
Valid: (0.8976, 0.45620513763427734)


In [116]:
optimizer.param_groups[0]["lr"] = 1e-3

In [117]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [02:04<00:00,  2.51it/s, loss=0.0314]
Epoch 1 | Training loss: 0.0314, validation accuracy: 0.9000, validation loss: 0.4431
100%|██████████| 313/313 [02:05<00:00,  2.49it/s, loss=0.0310]
Epoch 2 | Training loss: 0.0310, validation accuracy: 0.9005, validation loss: 0.4461
100%|██████████| 313/313 [02:04<00:00,  2.52it/s, loss=0.0283]
Epoch 3 | Training loss: 0.0283, validation accuracy: 0.8995, validation loss: 0.4445
100%|██████████| 313/313 [02:06<00:00,  2.48it/s, loss=0.0276]
Epoch 4 | Training loss: 0.0276, validation accuracy: 0.8994, validation loss: 0.4422
100%|██████████| 313/313 [02:04<00:00,  2.52it/s, loss=0.0284]
Epoch 5 | Training loss: 0.0284, validation accuracy: 0.8994, validation loss: 0.4459
100%|██████████| 313/313 [02:04<00:00,  2.52it/s, loss=0.0282]
Epoch 6 | Training loss: 0.0282, validation accuracy: 0.8997, validation loss: 0.4456
100%|██████████| 313/313 [02:04<00:00,  2.52it/s, loss=0.0272]
Epoch 7 | Training loss: 0.0272, validation

In [118]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.994825, 0.01932948977351189)
Valid: (0.9003, 0.44885823802948)


In [119]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [120]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.994925, 0.01953197303712368)
Valid: (0.8996, 0.44897826642990113)


In [121]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [122]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.994425, 0.020918183160573246)
Valid: (0.8996, 0.44897833452224734)


In [123]:
exit