In [1]:
import torch
from torch import nn
from torchvision import models
from copy import deepcopy
import os

from nn_extrapolation import AcceleratedSGD
from nn_utils import *

In [2]:
torch.cuda.is_available()

True

In [3]:
trainer = Trainer(
    device="cuda:1",
    loss_fn=nn.NLLLoss(reduction="mean"),
    val_loss_fn=nn.NLLLoss(reduction="sum"),
)

In [4]:
dl = load_dataset(
    dataset="CIFAR10",
    root=os.path.join("/tmp", os.environ["USER"], "CIFAR"),
    augmentation=transforms.RandomAffine(10, scale=(0.9, 1.1), translate=(0.2, 0.2)),
    validation_split=0.2,
    batch_size=128,
    num_workers=10,
)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /tmp/i291318/CIFAR/cifar-10-python.tar.gz


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting /tmp/i291318/CIFAR/cifar-10-python.tar.gz to /tmp/i291318/CIFAR
Files already downloaded and verified


In [5]:
model = models.resnet18(pretrained=False)
model.fc = nn.Sequential(
    nn.Linear(512, 10),
    nn.LogSoftmax(-1)
)
model.to(trainer.device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [6]:
trainer.validation(model, dl["valid"])

(0.1037, 2.3724672771453856)

## No momentum

In [8]:
optimizer = AcceleratedSGD(model.parameters(), 1e-1, k=10, momentum=0, weight_decay=0, lambda_=1e-8)
logger = Logger("resnet_log_augmentation_no_momentum.txt.no_resizing")

In [9]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:15<00:00, 20.08it/s, loss=1.9728]
Epoch 1 | Training loss: 1.9728, validation accuracy: 0.3371, validation loss: 2.0809
100%|██████████| 313/313 [00:15<00:00, 20.15it/s, loss=1.5676]
Epoch 2 | Training loss: 1.5676, validation accuracy: 0.4210, validation loss: 1.6793
100%|██████████| 313/313 [00:15<00:00, 20.24it/s, loss=1.4200]
Epoch 3 | Training loss: 1.4200, validation accuracy: 0.2880, validation loss: 17.0430
100%|██████████| 313/313 [00:15<00:00, 19.92it/s, loss=1.3180]
Epoch 4 | Training loss: 1.3180, validation accuracy: 0.5294, validation loss: 1.3956
100%|██████████| 313/313 [00:15<00:00, 19.87it/s, loss=1.2365]
Epoch 5 | Training loss: 1.2365, validation accuracy: 0.5612, validation loss: 1.2363
100%|██████████| 313/313 [00:16<00:00, 18.47it/s, loss=1.1589]
Epoch 6 | Training loss: 1.1589, validation accuracy: 0.5830, validation loss: 1.2227
100%|██████████| 313/313 [00:16<00:00, 18.81it/s, loss=1.1012]
Epoch 7 | Training loss: 1.1012, validatio

In [10]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.749, 0.7053908762931824)
Valid: (0.7365, 0.783669065284729)


In [11]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [12]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.76385, 0.667782802772522)
Valid: (0.7438, 0.7377233875274658)


In [13]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [14]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.76435, 0.6702670343399048)
Valid: (0.7438, 0.737723217010498)


In [15]:
optimizer.param_groups[0]["lr"] = 1e-2

In [16]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:15<00:00, 19.98it/s, loss=0.5692]
Epoch 1 | Training loss: 0.5692, validation accuracy: 0.7871, validation loss: 0.6320
100%|██████████| 313/313 [00:16<00:00, 19.50it/s, loss=0.5473]
Epoch 2 | Training loss: 0.5473, validation accuracy: 0.7873, validation loss: 0.6365
100%|██████████| 313/313 [00:16<00:00, 18.61it/s, loss=0.5356]
Epoch 3 | Training loss: 0.5356, validation accuracy: 0.7913, validation loss: 0.6223
100%|██████████| 313/313 [00:17<00:00, 18.29it/s, loss=0.5277]
Epoch 4 | Training loss: 0.5277, validation accuracy: 0.7914, validation loss: 0.6272
100%|██████████| 313/313 [00:16<00:00, 18.83it/s, loss=0.5252]
Epoch 5 | Training loss: 0.5252, validation accuracy: 0.7896, validation loss: 0.6259
100%|██████████| 313/313 [00:16<00:00, 18.86it/s, loss=0.5145]
Epoch 6 | Training loss: 0.5145, validation accuracy: 0.7943, validation loss: 0.6232
100%|██████████| 313/313 [00:16<00:00, 19.01it/s, loss=0.5090]
Epoch 7 | Training loss: 0.5090, validation

In [17]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.851125, 0.4204368348121643)
Valid: (0.7902, 0.6467131391525268)


In [18]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [19]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.847475, 0.42944177560806274)
Valid: (0.794, 0.6401141977310181)


In [20]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [21]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.845625, 0.4281534000873566)
Valid: (0.794, 0.6401142517089844)


In [22]:
optimizer.param_groups[0]["lr"] = 1e-3

In [23]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:15<00:00, 19.60it/s, loss=0.4405]
Epoch 1 | Training loss: 0.4405, validation accuracy: 0.7940, validation loss: 0.6423
100%|██████████| 313/313 [00:16<00:00, 19.49it/s, loss=0.4425]
Epoch 2 | Training loss: 0.4425, validation accuracy: 0.7944, validation loss: 0.6417
100%|██████████| 313/313 [00:16<00:00, 18.82it/s, loss=0.4348]
Epoch 3 | Training loss: 0.4348, validation accuracy: 0.7948, validation loss: 0.6376
100%|██████████| 313/313 [00:16<00:00, 18.61it/s, loss=0.4305]
Epoch 4 | Training loss: 0.4305, validation accuracy: 0.7942, validation loss: 0.6453
100%|██████████| 313/313 [00:16<00:00, 18.67it/s, loss=0.4302]
Epoch 5 | Training loss: 0.4302, validation accuracy: 0.7942, validation loss: 0.6378
100%|██████████| 313/313 [00:16<00:00, 18.99it/s, loss=0.4263]
Epoch 6 | Training loss: 0.4263, validation accuracy: 0.7948, validation loss: 0.6390
100%|██████████| 313/313 [00:16<00:00, 19.06it/s, loss=0.4272]
Epoch 7 | Training loss: 0.4272, validation

In [24]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.85995, 0.3970328094959259)
Valid: (0.7946, 0.648416905784607)


In [25]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [26]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.858975, 0.3964641182422638)
Valid: (0.7931, 0.6520758222579957)


In [27]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [28]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.8588, 0.4000325141429901)
Valid: (0.7931, 0.6520759426116943)


## Momentum

In [29]:
model = models.resnet18(pretrained=False)
model.fc = nn.Sequential(
    nn.Linear(512, 10),
    nn.LogSoftmax(-1)
)
model.to(trainer.device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [30]:
trainer.validation(model, dl["valid"])

(0.1034, 2.329251049041748)

In [31]:
optimizer = AcceleratedSGD(model.parameters(), 1e-1, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8)
logger = Logger("resnet_log_augmentation.txt.no_resizing")

In [32]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 19.25it/s, loss=2.1537]
Epoch 1 | Training loss: 2.1537, validation accuracy: 0.3719, validation loss: 1.6579
100%|██████████| 313/313 [00:16<00:00, 19.17it/s, loss=1.6446]
Epoch 2 | Training loss: 1.6446, validation accuracy: 0.4567, validation loss: 1.4957
100%|██████████| 313/313 [00:17<00:00, 18.14it/s, loss=1.4941]
Epoch 3 | Training loss: 1.4941, validation accuracy: 0.5202, validation loss: 1.3188
100%|██████████| 313/313 [00:17<00:00, 18.15it/s, loss=1.3914]
Epoch 4 | Training loss: 1.3914, validation accuracy: 0.5409, validation loss: 1.2601
100%|██████████| 313/313 [00:17<00:00, 18.06it/s, loss=1.3214]
Epoch 5 | Training loss: 1.3214, validation accuracy: 0.5516, validation loss: 1.2529
100%|██████████| 313/313 [00:17<00:00, 18.31it/s, loss=1.2366]
Epoch 6 | Training loss: 1.2366, validation accuracy: 0.5984, validation loss: 1.1251
100%|██████████| 313/313 [00:17<00:00, 18.37it/s, loss=1.1640]
Epoch 7 | Training loss: 1.1640, validation

In [33]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.788175, 0.598724176979065)
Valid: (0.7761, 0.6909784210205078)


In [34]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [35]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.751275, 0.6853015835285187)
Valid: (0.7527, 0.732500690460205)


In [36]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [37]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.7518, 0.6865542938232422)
Valid: (0.7527, 0.732499766921997)


In [38]:
optimizer.param_groups[0]["lr"] = 1e-2

In [39]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 19.03it/s, loss=0.5266]
Epoch 1 | Training loss: 0.5266, validation accuracy: 0.7982, validation loss: 0.6189
100%|██████████| 313/313 [00:16<00:00, 18.93it/s, loss=0.4928]
Epoch 2 | Training loss: 0.4928, validation accuracy: 0.8053, validation loss: 0.6048
100%|██████████| 313/313 [00:17<00:00, 18.04it/s, loss=0.4811]
Epoch 3 | Training loss: 0.4811, validation accuracy: 0.8067, validation loss: 0.6067
100%|██████████| 313/313 [00:17<00:00, 18.27it/s, loss=0.4727]
Epoch 4 | Training loss: 0.4727, validation accuracy: 0.8091, validation loss: 0.5968
100%|██████████| 313/313 [00:17<00:00, 17.87it/s, loss=0.4598]
Epoch 5 | Training loss: 0.4598, validation accuracy: 0.8031, validation loss: 0.6186
100%|██████████| 313/313 [00:17<00:00, 18.37it/s, loss=0.4572]
Epoch 6 | Training loss: 0.4572, validation accuracy: 0.8095, validation loss: 0.5945
100%|██████████| 313/313 [00:17<00:00, 18.40it/s, loss=0.4482]
Epoch 7 | Training loss: 0.4482, validation

In [40]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.876525, 0.3425853633403778)
Valid: (0.8098, 0.6122154071807862)


In [41]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [42]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.87355, 0.36096229751110076)
Valid: (0.8034, 0.6426097635269165)


In [43]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [44]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.86925, 0.3699721306800842)
Valid: (0.8034, 0.6426091287612915)


In [45]:
optimizer.param_groups[0]["lr"] = 1e-3

In [None]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 18.89it/s, loss=0.3606]
Epoch 1 | Training loss: 0.3606, validation accuracy: 0.8096, validation loss: 0.6133
100%|██████████| 313/313 [00:16<00:00, 18.81it/s, loss=0.3525]
Epoch 2 | Training loss: 0.3525, validation accuracy: 0.8102, validation loss: 0.6112
100%|██████████| 313/313 [00:17<00:00, 18.06it/s, loss=0.3561]
Epoch 3 | Training loss: 0.3561, validation accuracy: 0.8122, validation loss: 0.6078
 44%|████▍     | 139/313 [00:07<00:08, 19.51it/s, loss=0.3539]

In [None]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

In [None]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

In [None]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

In [None]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

In [None]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)