In [1]:
import torch
from torch import nn
from torchvision import models
from copy import deepcopy
import os

from nn_extrapolation import AcceleratedSGD
from nn_utils import *

In [2]:
trainer = Trainer(
    device="cuda:3",
    loss_fn=nn.NLLLoss(reduction="mean"),
    val_loss_fn=nn.NLLLoss(reduction="sum"),
)

In [3]:
dl = load_dataset(
    dataset="CIFAR10",
    root=os.path.join("/tmp", os.environ["USER"], "CIFAR"),
    augmentation=transforms.RandomAffine(10, scale=(0.9, 1.1), translate=(0.2, 0.2)),
    validation_split=0.2,
    batch_size=128,
    num_workers=10,
)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
model = models.vgg16(pretrained=False)
model.classifier[6] = nn.Linear(4096, 10)
model.classifier.add_module("7", nn.LogSoftmax(-1))

model.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [5]:
trainer.validation(model, dl["valid"])

(0.0948, 2.3034510635375978)

## Momentum

In [6]:
optimizer = torch.optim.Adam(model.parameters(), 1e-4, weight_decay=1e-5)
logger = Logger("vgg_log_augmentation_adam.txt.no_resizing", overwrite=True)

In [None]:
epochs = 70

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:48<00:00,  6.46it/s, loss=1.9859]
Epoch 1 | Training loss: 1.9859, validation accuracy: 0.3323, validation loss: 1.7165
100%|██████████| 313/313 [00:49<00:00,  6.34it/s, loss=1.6262]
Epoch 2 | Training loss: 1.6262, validation accuracy: 0.4460, validation loss: 1.4727
100%|██████████| 313/313 [00:49<00:00,  6.37it/s, loss=1.4558]
Epoch 3 | Training loss: 1.4558, validation accuracy: 0.5186, validation loss: 1.3291
100%|██████████| 313/313 [00:48<00:00,  6.40it/s, loss=1.3276]
Epoch 4 | Training loss: 1.3276, validation accuracy: 0.5634, validation loss: 1.2200
100%|██████████| 313/313 [00:48<00:00,  6.40it/s, loss=1.2236]
Epoch 5 | Training loss: 1.2236, validation accuracy: 0.5971, validation loss: 1.1213
100%|██████████| 313/313 [00:48<00:00,  6.42it/s, loss=1.1431]
Epoch 6 | Training loss: 1.1431, validation accuracy: 0.6312, validation loss: 1.0330
100%|██████████| 313/313 [00:48<00:00,  6.40it/s, loss=1.0720]
Epoch 7 | Training loss: 1.0720, validation

In [None]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)