In [1]:
import torch
from torch import nn
from torchvision import models
from copy import deepcopy
import os

from nn_extrapolation import AcceleratedSGD
from nn_utils import *

In [2]:
trainer = Trainer(
    device="cuda:2",
    loss_fn=nn.NLLLoss(reduction="mean"),
    val_loss_fn=nn.NLLLoss(reduction="sum"),
)

In [3]:
dl = load_dataset(
    dataset="CIFAR10",
    root=os.path.join("/tmp", os.environ["USER"], "CIFAR"),
    augmentation=transforms.RandomAffine(10, scale=(0.9, 1.1), translate=(0.2, 0.2)),
    validation_split=0.2,
    batch_size=128,
    num_workers=10,
)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
model = models.resnet18(pretrained=False)
model.fc = nn.Sequential(
    nn.Linear(512, 10),
    nn.LogSoftmax(-1)
)
model.to(trainer.device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [5]:
trainer.validation(model, dl["valid"])

(0.1021, 2.308509880065918)

## No momentum

In [6]:
optimizer = AcceleratedSGD(model.parameters(), 1e-1, k=5, momentum=0, weight_decay=0, lambda_=1e-8)
logger = Logger("resnet_log_augmentation_no_momentum-k=5.txt.no_resizing")

In [7]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:15<00:00, 19.79it/s, loss=1.9727]
Epoch 1 | Training loss: 1.9727, validation accuracy: 0.4042, validation loss: 1.7272
100%|██████████| 313/313 [00:15<00:00, 19.75it/s, loss=1.5302]
Epoch 2 | Training loss: 1.5302, validation accuracy: 0.4483, validation loss: 1.6383
100%|██████████| 313/313 [00:16<00:00, 19.54it/s, loss=1.3817]
Epoch 3 | Training loss: 1.3817, validation accuracy: 0.4814, validation loss: 1.5466
100%|██████████| 313/313 [00:16<00:00, 19.44it/s, loss=1.2727]
Epoch 4 | Training loss: 1.2727, validation accuracy: 0.5681, validation loss: 1.2354
100%|██████████| 313/313 [00:16<00:00, 19.56it/s, loss=1.1858]
Epoch 5 | Training loss: 1.1858, validation accuracy: 0.5325, validation loss: 1.3798
100%|██████████| 313/313 [00:16<00:00, 18.64it/s, loss=1.1245]
Epoch 6 | Training loss: 1.1245, validation accuracy: 0.6204, validation loss: 1.0743
100%|██████████| 313/313 [00:16<00:00, 18.58it/s, loss=1.0724]
Epoch 7 | Training loss: 1.0724, validation

In [8]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.74585, 0.7224090738296509)
Valid: (0.7326, 0.8144819259643554)


In [9]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [10]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.752675, 0.6807467199325562)
Valid: (0.7387, 0.7605802110671998)


In [11]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [12]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.752725, 0.6802547610282897)
Valid: (0.7386, 0.7605391750335694)


In [13]:
optimizer.param_groups[0]["lr"] = 1e-2

In [14]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 19.31it/s, loss=0.5646]
Epoch 1 | Training loss: 0.5646, validation accuracy: 0.7816, validation loss: 0.6313
100%|██████████| 313/313 [00:16<00:00, 19.29it/s, loss=0.5370]
Epoch 2 | Training loss: 0.5370, validation accuracy: 0.7861, validation loss: 0.6241
100%|██████████| 313/313 [00:17<00:00, 18.41it/s, loss=0.5222]
Epoch 3 | Training loss: 0.5222, validation accuracy: 0.7860, validation loss: 0.6261
100%|██████████| 313/313 [00:16<00:00, 18.73it/s, loss=0.5172]
Epoch 4 | Training loss: 0.5172, validation accuracy: 0.7854, validation loss: 0.6284
100%|██████████| 313/313 [00:16<00:00, 18.65it/s, loss=0.5074]
Epoch 5 | Training loss: 0.5074, validation accuracy: 0.7876, validation loss: 0.6267
100%|██████████| 313/313 [00:16<00:00, 18.83it/s, loss=0.5021]
Epoch 6 | Training loss: 0.5021, validation accuracy: 0.7875, validation loss: 0.6294
100%|██████████| 313/313 [00:16<00:00, 18.77it/s, loss=0.4979]
Epoch 7 | Training loss: 0.4979, validation

In [15]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.8543, 0.40428740539550784)
Valid: (0.7869, 0.6547213036537171)


In [16]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [17]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.8566, 0.40107156543731687)
Valid: (0.7912, 0.6553733768463135)


In [18]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [19]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.85745, 0.3997431619167328)
Valid: (0.7912, 0.655369130897522)


In [20]:
optimizer.param_groups[0]["lr"] = 1e-3

In [21]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 19.49it/s, loss=0.4252]
Epoch 1 | Training loss: 0.4252, validation accuracy: 0.7927, validation loss: 0.6420
100%|██████████| 313/313 [00:16<00:00, 19.46it/s, loss=0.4226]
Epoch 2 | Training loss: 0.4226, validation accuracy: 0.7920, validation loss: 0.6523
100%|██████████| 313/313 [00:16<00:00, 18.45it/s, loss=0.4178]
Epoch 3 | Training loss: 0.4178, validation accuracy: 0.7928, validation loss: 0.6475
100%|██████████| 313/313 [00:16<00:00, 18.71it/s, loss=0.4178]
Epoch 4 | Training loss: 0.4178, validation accuracy: 0.7936, validation loss: 0.6506
100%|██████████| 313/313 [00:16<00:00, 18.89it/s, loss=0.4149]
Epoch 5 | Training loss: 0.4149, validation accuracy: 0.7918, validation loss: 0.6502
100%|██████████| 313/313 [00:16<00:00, 18.76it/s, loss=0.4171]
Epoch 6 | Training loss: 0.4171, validation accuracy: 0.7939, validation loss: 0.6434
100%|██████████| 313/313 [00:16<00:00, 18.75it/s, loss=0.4116]
Epoch 7 | Training loss: 0.4116, validation

In [22]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.864625, 0.37954831800460814)
Valid: (0.7941, 0.6516498949050903)


In [23]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [24]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.862875, 0.3849262727737427)
Valid: (0.7937, 0.6520042837142944)


In [25]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [26]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.86555, 0.3789937041282654)
Valid: (0.7937, 0.6520039552688599)


## Momentum

In [27]:
model = models.resnet18(pretrained=False)
model.fc = nn.Sequential(
    nn.Linear(512, 10),
    nn.LogSoftmax(-1)
)
model.to(trainer.device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [28]:
trainer.validation(model, dl["valid"])

(0.1023, 2.3524650859832765)

In [29]:
optimizer = AcceleratedSGD(model.parameters(), 1e-1, k=5, momentum=0.9, weight_decay=1e-5, lambda_=1e-8)
logger = Logger("resnet_log_augmentation-k=5.txt.no_resizing")

In [30]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 19.08it/s, loss=2.2447]
Epoch 1 | Training loss: 2.2447, validation accuracy: 0.3455, validation loss: 1.8584
100%|██████████| 313/313 [00:16<00:00, 18.98it/s, loss=1.6953]
Epoch 2 | Training loss: 1.6953, validation accuracy: 0.4205, validation loss: 1.5827
100%|██████████| 313/313 [00:17<00:00, 18.04it/s, loss=1.5429]
Epoch 3 | Training loss: 1.5429, validation accuracy: 0.4701, validation loss: 1.4255
100%|██████████| 313/313 [00:17<00:00, 18.20it/s, loss=1.4211]
Epoch 4 | Training loss: 1.4211, validation accuracy: 0.5176, validation loss: 1.3484
100%|██████████| 313/313 [00:17<00:00, 18.18it/s, loss=1.3248]
Epoch 5 | Training loss: 1.3248, validation accuracy: 0.5567, validation loss: 1.2389
100%|██████████| 313/313 [00:17<00:00, 18.13it/s, loss=1.2586]
Epoch 6 | Training loss: 1.2586, validation accuracy: 0.5738, validation loss: 1.1959
100%|██████████| 313/313 [00:17<00:00, 18.03it/s, loss=1.2068]
Epoch 7 | Training loss: 1.2068, validation

In [31]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.773775, 0.6439791312694549)
Valid: (0.7534, 0.7247883583068848)


In [32]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [33]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.778025, 0.6237394191741943)
Valid: (0.7702, 0.6659030452728272)


In [34]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [35]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.780625, 0.6175967395305634)
Valid: (0.7702, 0.6658548971176147)


In [36]:
optimizer.param_groups[0]["lr"] = 1e-2

In [None]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 18.62it/s, loss=0.5521]
Epoch 1 | Training loss: 0.5521, validation accuracy: 0.7948, validation loss: 0.6214
100%|██████████| 313/313 [00:16<00:00, 18.77it/s, loss=0.5223]
Epoch 2 | Training loss: 0.5223, validation accuracy: 0.7976, validation loss: 0.6180
100%|██████████| 313/313 [00:17<00:00, 18.02it/s, loss=0.5073]
Epoch 3 | Training loss: 0.5073, validation accuracy: 0.7972, validation loss: 0.6190
100%|██████████| 313/313 [00:17<00:00, 18.20it/s, loss=0.4956]
Epoch 4 | Training loss: 0.4956, validation accuracy: 0.7974, validation loss: 0.6171
100%|██████████| 313/313 [00:17<00:00, 18.17it/s, loss=0.4866]
Epoch 5 | Training loss: 0.4866, validation accuracy: 0.7974, validation loss: 0.6254
100%|██████████| 313/313 [00:17<00:00, 18.26it/s, loss=0.4864]
Epoch 6 | Training loss: 0.4864, validation accuracy: 0.7992, validation loss: 0.6174
100%|██████████| 313/313 [00:17<00:00, 18.20it/s, loss=0.4770]
Epoch 7 | Training loss: 0.4770, validation

In [None]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

In [None]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

In [None]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

In [None]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

In [None]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

In [None]:
optimizer.param_groups[0]["lr"] = 1e-3

In [None]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

In [None]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

In [None]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

In [None]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

In [None]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

In [None]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)