In [1]:
import torch
from torch import nn
from torchvision import models
from copy import deepcopy
import os

from nn_extrapolation import AcceleratedSGD
from nn_utils import *

In [2]:
torch.cuda.is_available()

True

In [3]:
trainer = Trainer(
    device="cuda:1",
    loss_fn=nn.NLLLoss(reduction="mean"),
    val_loss_fn=nn.NLLLoss(reduction="sum"),
)

In [4]:
dl = load_dataset(
    dataset="CIFAR10",
    root=os.path.join("/tmp", os.environ["USER"], "CIFAR"),
    augmentation=transforms.RandomAffine(10, scale=(0.9, 1.1), translate=(0.2, 0.2)),
    validation_split=0.2,
    batch_size=128,
    num_workers=10,
)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
model = models.resnet18(pretrained=False)

In [30]:
def replace_bn(model, **kwargs):
    to_replace = []
    for name, _ in model.named_children():
        if name.startswith("bn"):
            to_replace.append(name)
    for name in to_replace:
        setattr(model, name, nn.Dropout(**kwargs))
        
def make_model():
    model = models.resnet18(pretrained=False)
    model.fc = nn.Sequential(
        nn.Linear(512, 10),
        nn.LogSoftmax(-1)
    )
    replace_bn(model, p=0.2)
    for name in ["layer1", "layer2", "layer3", "layer4"]:
        layer = getattr(model, name)
        replace_bn(layer[0], p=0.2)
        replace_bn(layer[1], p=0.2)
        try:
            replace_bn(layer[0].downsample, p=0.2)
        except AttributeError:
            pass
    return model

In [31]:
model = make_model()
model.to(trainer.device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): Dropout(p=0.2, inplace=False)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): Dropout(p=0.2, inplace=False)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): Dropout(p=0.2, inplace=False)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): Dropout(p=0.2, inplace=False)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): Dropout(p=0.2, inplace=False)
    )
  )
  (layer2): Sequential(
    (0): BasicBlock(
      (conv

In [32]:
trainer.validation(model, dl["valid"])

(0.0975, 2.338772444152832)

## No momentum

In [33]:
optimizer = AcceleratedSGD(model.parameters(), 1e-1, k=10, momentum=0, weight_decay=0, lambda_=1e-8)
logger = Logger("resnet_log_augmentation_dropout_no_momentum.txt.no_resizing")

In [34]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:14<00:00, 21.48it/s, loss=1.9946]
Epoch 1 | Training loss: 1.9946, validation accuracy: 0.2292, validation loss: 2.1591
100%|██████████| 313/313 [00:14<00:00, 21.20it/s, loss=1.7706]
Epoch 2 | Training loss: 1.7706, validation accuracy: 0.2815, validation loss: 2.0057
100%|██████████| 313/313 [00:14<00:00, 21.10it/s, loss=1.6507]
Epoch 3 | Training loss: 1.6507, validation accuracy: 0.4238, validation loss: 1.5625
100%|██████████| 313/313 [00:14<00:00, 20.95it/s, loss=1.5684]
Epoch 4 | Training loss: 1.5684, validation accuracy: 0.4422, validation loss: 1.5212
100%|██████████| 313/313 [00:15<00:00, 20.72it/s, loss=1.5042]
Epoch 5 | Training loss: 1.5042, validation accuracy: 0.4477, validation loss: 1.6121
100%|██████████| 313/313 [00:15<00:00, 20.40it/s, loss=1.4600]
Epoch 6 | Training loss: 1.4600, validation accuracy: 0.4480, validation loss: 1.6679
100%|██████████| 313/313 [00:15<00:00, 20.48it/s, loss=1.4098]
Epoch 7 | Training loss: 1.4098, validation

In [35]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.643025, 1.0297035776138306)
Valid: (0.6581, 0.9965353843688964)


In [36]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [37]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.678625, 0.9105850963592529)
Valid: (0.692, 0.8713434221267701)


In [38]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [39]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.6749, 0.9116784420967102)
Valid: (0.692, 0.8713433927536011)


In [40]:
optimizer.param_groups[0]["lr"] = 1e-2

In [41]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:14<00:00, 21.24it/s, loss=0.9246]
Epoch 1 | Training loss: 0.9246, validation accuracy: 0.7055, validation loss: 0.8556
100%|██████████| 313/313 [00:15<00:00, 20.83it/s, loss=0.9122]
Epoch 2 | Training loss: 0.9122, validation accuracy: 0.7049, validation loss: 0.8542
100%|██████████| 313/313 [00:15<00:00, 20.56it/s, loss=0.9023]
Epoch 3 | Training loss: 0.9023, validation accuracy: 0.7032, validation loss: 0.8557
100%|██████████| 313/313 [00:15<00:00, 20.04it/s, loss=0.8999]
Epoch 4 | Training loss: 0.8999, validation accuracy: 0.7111, validation loss: 0.8377
100%|██████████| 313/313 [00:15<00:00, 20.42it/s, loss=0.8961]
Epoch 5 | Training loss: 0.8961, validation accuracy: 0.7120, validation loss: 0.8312
100%|██████████| 313/313 [00:15<00:00, 20.21it/s, loss=0.8880]
Epoch 6 | Training loss: 0.8880, validation accuracy: 0.7169, validation loss: 0.8194
100%|██████████| 313/313 [00:15<00:00, 20.44it/s, loss=0.8892]
Epoch 7 | Training loss: 0.8892, validation

In [42]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.724175, 0.784323591709137)
Valid: (0.73, 0.7811445056915283)


In [43]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [44]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.716925, 0.8059598308563233)
Valid: (0.7255, 0.7975503099441529)


In [45]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [46]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.714475, 0.8085428611755371)
Valid: (0.7255, 0.7975505010604859)


In [47]:
optimizer.param_groups[0]["lr"] = 1e-3

In [48]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:15<00:00, 19.63it/s, loss=0.8381]
Epoch 1 | Training loss: 0.8381, validation accuracy: 0.7268, validation loss: 0.7875
100%|██████████| 313/313 [00:17<00:00, 18.05it/s, loss=0.8351]
Epoch 2 | Training loss: 0.8351, validation accuracy: 0.7259, validation loss: 0.7880
100%|██████████| 313/313 [00:14<00:00, 21.34it/s, loss=0.8404]
Epoch 3 | Training loss: 0.8404, validation accuracy: 0.7283, validation loss: 0.7833
100%|██████████| 313/313 [00:15<00:00, 20.00it/s, loss=0.8330]
Epoch 4 | Training loss: 0.8330, validation accuracy: 0.7277, validation loss: 0.7855
100%|██████████| 313/313 [00:15<00:00, 20.26it/s, loss=0.8390]
Epoch 5 | Training loss: 0.8390, validation accuracy: 0.7290, validation loss: 0.7828
100%|██████████| 313/313 [00:15<00:00, 20.29it/s, loss=0.8369]
Epoch 6 | Training loss: 0.8369, validation accuracy: 0.7268, validation loss: 0.7908
100%|██████████| 313/313 [00:15<00:00, 20.27it/s, loss=0.8297]
Epoch 7 | Training loss: 0.8297, validation

In [49]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.725725, 0.7799556538581848)
Valid: (0.7292, 0.7883560613632202)


In [50]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [51]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.7245, 0.7831740236282348)
Valid: (0.7276, 0.791721588897705)


In [52]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [53]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.72275, 0.789233359336853)
Valid: (0.7276, 0.7917215955734253)


## Momentum

In [54]:
model = make_model()
model.to(trainer.device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): Dropout(p=0.2, inplace=False)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): Dropout(p=0.2, inplace=False)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): Dropout(p=0.2, inplace=False)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): Dropout(p=0.2, inplace=False)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): Dropout(p=0.2, inplace=False)
    )
  )
  (layer2): Sequential(
    (0): BasicBlock(
      (conv

In [55]:
trainer.validation(model, dl["valid"])

(0.1004, 2.3497232345581054)

In [56]:
optimizer = AcceleratedSGD(model.parameters(), 1e-1, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8)
logger = Logger("resnet_log_augmentation_dropout.txt.no_resizing")

In [57]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:15<00:00, 20.82it/s, loss=2.0669]
Epoch 1 | Training loss: 2.0669, validation accuracy: 0.2770, validation loss: 1.9935
100%|██████████| 313/313 [00:15<00:00, 20.70it/s, loss=1.8792]
Epoch 2 | Training loss: 1.8792, validation accuracy: 0.3178, validation loss: 1.8271
100%|██████████| 313/313 [00:15<00:00, 20.60it/s, loss=1.7621]
Epoch 3 | Training loss: 1.7621, validation accuracy: 0.3626, validation loss: 1.6871
100%|██████████| 313/313 [00:15<00:00, 20.31it/s, loss=1.6733]
Epoch 4 | Training loss: 1.6733, validation accuracy: 0.4206, validation loss: 1.5620
100%|██████████| 313/313 [00:15<00:00, 20.09it/s, loss=1.6111]
Epoch 5 | Training loss: 1.6111, validation accuracy: 0.4448, validation loss: 1.5225
100%|██████████| 313/313 [00:15<00:00, 20.31it/s, loss=1.5456]
Epoch 6 | Training loss: 1.5456, validation accuracy: 0.4608, validation loss: 1.5300
100%|██████████| 313/313 [00:15<00:00, 20.27it/s, loss=1.4924]
Epoch 7 | Training loss: 1.4924, validation

In [58]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.654, 0.9836876014709472)
Valid: (0.6739, 0.9303822853088379)


In [59]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [60]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.6217, 1.0990787353515625)
Valid: (0.6528, 1.0022801303863524)


In [61]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [62]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.622075, 1.0969384763717651)
Valid: (0.6528, 1.0022829542160034)


In [63]:
optimizer.param_groups[0]["lr"] = 1e-2

In [64]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:15<00:00, 20.64it/s, loss=0.9516]
Epoch 1 | Training loss: 0.9516, validation accuracy: 0.7059, validation loss: 0.8490
100%|██████████| 313/313 [00:15<00:00, 20.47it/s, loss=0.9323]
Epoch 2 | Training loss: 0.9323, validation accuracy: 0.7088, validation loss: 0.8376
100%|██████████| 313/313 [00:15<00:00, 20.45it/s, loss=0.9242]
Epoch 3 | Training loss: 0.9242, validation accuracy: 0.7073, validation loss: 0.8434
100%|██████████| 313/313 [00:15<00:00, 20.45it/s, loss=0.9242]
Epoch 4 | Training loss: 0.9242, validation accuracy: 0.7051, validation loss: 0.8524
100%|██████████| 313/313 [00:15<00:00, 20.10it/s, loss=0.9106]
Epoch 5 | Training loss: 0.9106, validation accuracy: 0.7082, validation loss: 0.8339
100%|██████████| 313/313 [00:15<00:00, 20.29it/s, loss=0.9062]
Epoch 6 | Training loss: 0.9062, validation accuracy: 0.7128, validation loss: 0.8269
100%|██████████| 313/313 [00:15<00:00, 20.50it/s, loss=0.8990]
Epoch 7 | Training loss: 0.8990, validation

In [65]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.72105, 0.796857222366333)
Valid: (0.7262, 0.7920450061798096)


In [66]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [67]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.71915, 0.8034924876213074)
Valid: (0.7258, 0.7971745235443115)


In [68]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [69]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.719075, 0.8057135431289673)
Valid: (0.7258, 0.7971746562957763)


In [70]:
optimizer.param_groups[0]["lr"] = 1e-3

In [71]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:15<00:00, 20.85it/s, loss=0.8477]
Epoch 1 | Training loss: 0.8477, validation accuracy: 0.7284, validation loss: 0.7861
100%|██████████| 313/313 [00:15<00:00, 20.50it/s, loss=0.8412]
Epoch 2 | Training loss: 0.8412, validation accuracy: 0.7286, validation loss: 0.7859
100%|██████████| 313/313 [00:15<00:00, 20.33it/s, loss=0.8387]
Epoch 3 | Training loss: 0.8387, validation accuracy: 0.7306, validation loss: 0.7801
100%|██████████| 313/313 [00:15<00:00, 20.30it/s, loss=0.8410]
Epoch 4 | Training loss: 0.8410, validation accuracy: 0.7285, validation loss: 0.7821
100%|██████████| 313/313 [00:15<00:00, 20.25it/s, loss=0.8404]
Epoch 5 | Training loss: 0.8404, validation accuracy: 0.7274, validation loss: 0.7850
100%|██████████| 313/313 [00:15<00:00, 20.33it/s, loss=0.8343]
Epoch 6 | Training loss: 0.8343, validation accuracy: 0.7274, validation loss: 0.7849
100%|██████████| 313/313 [00:15<00:00, 20.25it/s, loss=0.8362]
Epoch 7 | Training loss: 0.8362, validation

In [72]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.719625, 0.792577197265625)
Valid: (0.7309, 0.7789380920410156)


In [73]:
optimizer.param_groups[0]["method"] = "RNA"
logger.log("RNA")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RNA


In [74]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.722125, 0.7873786575317383)
Valid: (0.7319, 0.7732045806884765)


In [75]:
optimizer.param_groups[0]["method"] = "RRE"
logger.log("RRE")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

RRE


In [76]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.726125, 0.7809732400894165)
Valid: (0.7319, 0.773204490852356)


In [77]:
exit