In [1]:
import torch
from torch import nn
from torchvision import models
from copy import deepcopy
import os

from nn_extrapolation import AcceleratedSGD
from nn_utils import *

In [2]:
torch.cuda.is_available()

True

In [3]:
trainer = Trainer(
    device="cuda:2",
    loss_fn=nn.NLLLoss(reduction="mean"),
    val_loss_fn=nn.NLLLoss(reduction="sum"),
)

In [4]:
dl = load_dataset(
    dataset="CIFAR10",
    root=os.path.join("/tmp", os.environ["USER"], "CIFAR"),
    augmentation=transforms.RandomAffine(10, scale=(0.9, 1.1), translate=(0.2, 0.2)),
    validation_split=0.2,
    batch_size=128,
    num_workers=10,
)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
model = models.resnet18(pretrained=False)
model.fc = nn.Sequential(
    nn.Linear(512, 10),
    nn.LogSoftmax(-1)
)
model.to(trainer.device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [6]:
trainer.validation(model, dl["valid"])

(0.1011, 2.362632771682739)

## Momentum

In [7]:
trainer.validation(model, dl["valid"])

(0.1011, 2.362632771682739)

In [8]:
optimizer = AcceleratedSGD(model.parameters(), 1e-1, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8)
logger = Logger("resnet_log_augmentation-Levin.txt.no_resizing")

In [9]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 19.22it/s, loss=2.3289]
Epoch 1 | Training loss: 2.3289, validation accuracy: 0.3279, validation loss: 1.8093
100%|██████████| 313/313 [00:16<00:00, 18.85it/s, loss=1.7030]
Epoch 2 | Training loss: 1.7030, validation accuracy: 0.4034, validation loss: 1.6126
100%|██████████| 313/313 [00:16<00:00, 18.90it/s, loss=1.5559]
Epoch 3 | Training loss: 1.5559, validation accuracy: 0.4533, validation loss: 1.4690
100%|██████████| 313/313 [00:16<00:00, 18.68it/s, loss=1.4419]
Epoch 4 | Training loss: 1.4419, validation accuracy: 0.5197, validation loss: 1.3347
100%|██████████| 313/313 [00:16<00:00, 18.64it/s, loss=1.3487]
Epoch 5 | Training loss: 1.3487, validation accuracy: 0.5348, validation loss: 1.3002
100%|██████████| 313/313 [00:17<00:00, 17.98it/s, loss=1.2752]
Epoch 6 | Training loss: 1.2752, validation accuracy: 0.5674, validation loss: 1.2280
100%|██████████| 313/313 [00:17<00:00, 18.19it/s, loss=1.1946]
Epoch 7 | Training loss: 1.1946, validation

In [10]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.783175, 0.6164686933517456)
Valid: (0.7685, 0.6864338821411132)


In [11]:
optimizer.param_groups[0]["method"] = "Levin:t"
logger.log("Levin:t")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

Levin:t


In [12]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.682125, 0.9185433156013488)
Valid: (0.6863, 0.9745683364868164)


In [13]:
optimizer.param_groups[0]["method"] = "Levin:u"
logger.log("Levin:u")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

Levin:u


In [14]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.665175, 0.9735346799850464)
Valid: (0.6696, 1.0341466751098634)


In [15]:
optimizer.param_groups[0]["method"] = "Levin:v"
logger.log("Levin:v")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

Levin:v


In [16]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.691575, 0.8899491945266723)
Valid: (0.7084, 0.8646075942993164)


In [17]:
optimizer.param_groups[0]["lr"] = 1e-2

In [18]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 18.48it/s, loss=0.5527]
Epoch 1 | Training loss: 0.5527, validation accuracy: 0.7915, validation loss: 0.6249
100%|██████████| 313/313 [00:16<00:00, 18.47it/s, loss=0.5282]
Epoch 2 | Training loss: 0.5282, validation accuracy: 0.7943, validation loss: 0.6112
100%|██████████| 313/313 [00:18<00:00, 17.23it/s, loss=0.5205]
Epoch 3 | Training loss: 0.5205, validation accuracy: 0.7947, validation loss: 0.6125
100%|██████████| 313/313 [00:17<00:00, 17.45it/s, loss=0.5064]
Epoch 4 | Training loss: 0.5064, validation accuracy: 0.7920, validation loss: 0.6161
100%|██████████| 313/313 [00:17<00:00, 17.85it/s, loss=0.5046]
Epoch 5 | Training loss: 0.5046, validation accuracy: 0.7934, validation loss: 0.6174
100%|██████████| 313/313 [00:17<00:00, 17.76it/s, loss=0.4989]
Epoch 6 | Training loss: 0.4989, validation accuracy: 0.7921, validation loss: 0.6119
100%|██████████| 313/313 [00:17<00:00, 17.86it/s, loss=0.4862]
Epoch 7 | Training loss: 0.4862, validation

In [19]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.86205, 0.387502751159668)
Valid: (0.8043, 0.6181156715393067)


In [20]:
optimizer.param_groups[0]["method"] = "Levin:t"
logger.log("Levin:t")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

Levin:t


In [21]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.84955, 0.4213036576271057)
Valid: (0.7926, 0.6668689551353455)


In [22]:
optimizer.param_groups[0]["method"] = "Levin:u"
logger.log("Levin:u")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

Levin:u


In [23]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.846425, 0.4293213541507721)
Valid: (0.7895, 0.6737623242378235)


In [24]:
optimizer.param_groups[0]["method"] = "Levin:v"
logger.log("Levin:v")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

Levin:v


In [25]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.82605, 0.4924284327507019)
Valid: (0.7803, 0.7121404245376587)


In [26]:
optimizer.param_groups[0]["lr"] = 1e-3

In [27]:
epochs = 25

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:16<00:00, 18.43it/s, loss=0.3987]
Epoch 1 | Training loss: 0.3987, validation accuracy: 0.7993, validation loss: 0.6134
100%|██████████| 313/313 [00:17<00:00, 18.09it/s, loss=0.3940]
Epoch 2 | Training loss: 0.3940, validation accuracy: 0.8022, validation loss: 0.6113
100%|██████████| 313/313 [00:18<00:00, 17.13it/s, loss=0.3913]
Epoch 3 | Training loss: 0.3913, validation accuracy: 0.8026, validation loss: 0.6111
100%|██████████| 313/313 [00:18<00:00, 17.21it/s, loss=0.3881]
Epoch 4 | Training loss: 0.3881, validation accuracy: 0.8020, validation loss: 0.6149
100%|██████████| 313/313 [00:18<00:00, 17.20it/s, loss=0.3881]
Epoch 5 | Training loss: 0.3881, validation accuracy: 0.8035, validation loss: 0.6139
100%|██████████| 313/313 [00:18<00:00, 17.34it/s, loss=0.3878]
Epoch 6 | Training loss: 0.3878, validation accuracy: 0.8031, validation loss: 0.6196
100%|██████████| 313/313 [00:17<00:00, 17.41it/s, loss=0.3891]
Epoch 7 | Training loss: 0.3891, validation

In [28]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.873375, 0.35541710109710695)
Valid: (0.8021, 0.6185395000457764)


In [29]:
optimizer.param_groups[0]["method"] = "Levin:t"
logger.log("Levin:t")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

Levin:t


In [30]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.852425, 0.42726387667655946)
Valid: (0.783, 0.6935818186759949)


In [31]:
optimizer.param_groups[0]["method"] = "Levin:u"
logger.log("Levin:u")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

Levin:u


In [32]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.84935, 0.4336830590724945)
Valid: (0.782, 0.699140240573883)


In [33]:
optimizer.param_groups[0]["method"] = "Levin:v"
logger.log("Levin:v")
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

Levin:v


In [34]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.87265, 0.35981161918640137)
Valid: (0.802, 0.62165378408432)
