In [1]:
import torch
from torch import nn
from torchvision import models
from copy import deepcopy
import os

from nn_extrapolation import AcceleratedSGD
from nn_utils import *

In [2]:
trainer = Trainer(
    device="cuda:3",
    loss_fn=nn.NLLLoss(reduction="mean"),
    val_loss_fn=nn.NLLLoss(reduction="sum"),
)

In [3]:
dl = load_dataset(
    dataset="CIFAR10",
    root=os.path.join("/tmp", os.environ["USER"], "CIFAR"),
    augmentation=transforms.RandomAffine(10, scale=(0.9, 1.1), translate=(0.2, 0.2)),
    validation_split=0.2,
    batch_size=128,
    num_workers=10,
)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
def make_model():
    model = models.vgg16(pretrained=False)
    model.classifier[6] = nn.Linear(4096, 10)
    model.classifier.add_module("7", nn.LogSoftmax(-1))
    return model

## Momentum

### k = 10

In [5]:
model = make_model()
model.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [6]:
optimizer = AcceleratedSGD(model.parameters(), 1e-2, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8)
loss_fn = nn.NLLLoss()
logger = Logger("vgg_log_augmentation-early_stopping.txt.no_resizing")

In [7]:
max_epochs = 300
early_stopping = EarlyStopping(15)

for epoch in range(max_epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")
    if early_stopping.should_stop(val_loss):
        break

100%|██████████| 313/313 [00:38<00:00,  8.19it/s, loss=2.1387]
Epoch 1 | Training loss: 2.1387, validation accuracy: 0.2612, validation loss: 1.8824
100%|██████████| 313/313 [00:40<00:00,  7.65it/s, loss=1.7732]
Epoch 2 | Training loss: 1.7732, validation accuracy: 0.3634, validation loss: 1.6652
100%|██████████| 313/313 [00:40<00:00,  7.65it/s, loss=1.5721]
Epoch 3 | Training loss: 1.5721, validation accuracy: 0.4611, validation loss: 1.4284
100%|██████████| 313/313 [00:41<00:00,  7.57it/s, loss=1.4057]
Epoch 4 | Training loss: 1.4057, validation accuracy: 0.5345, validation loss: 1.2588
100%|██████████| 313/313 [00:41<00:00,  7.58it/s, loss=1.2655]
Epoch 5 | Training loss: 1.2655, validation accuracy: 0.6087, validation loss: 1.0851
100%|██████████| 313/313 [00:41<00:00,  7.53it/s, loss=1.1579]
Epoch 6 | Training loss: 1.1579, validation accuracy: 0.6664, validation loss: 0.9315
100%|██████████| 313/313 [00:41<00:00,  7.53it/s, loss=1.0544]
Epoch 7 | Training loss: 1.0544, validation

In [8]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.943925, 0.1638738555431366)
Valid: (0.8526, 0.5123599355697632)


In [9]:
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

In [11]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)
model_acc.cpu()

Train: (0.9749, 0.08244056740403176)
Valid: (0.8773, 0.4064589352607727)


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [12]:
optimizer.param_groups[0]["lr"] = 1e-3

In [13]:
max_epochs = 300
early_stopping = EarlyStopping(15)

for epoch in range(max_epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")
    if early_stopping.should_stop(val_loss):
        break

100%|██████████| 313/313 [00:40<00:00,  7.77it/s, loss=0.1085]
Epoch 1 | Training loss: 0.1085, validation accuracy: 0.8704, validation loss: 0.4927
100%|██████████| 313/313 [00:40<00:00,  7.64it/s, loss=0.0811]
Epoch 2 | Training loss: 0.0811, validation accuracy: 0.8731, validation loss: 0.5133
100%|██████████| 313/313 [00:40<00:00,  7.68it/s, loss=0.0739]
Epoch 3 | Training loss: 0.0739, validation accuracy: 0.8733, validation loss: 0.5249
100%|██████████| 313/313 [00:40<00:00,  7.71it/s, loss=0.0704]
Epoch 4 | Training loss: 0.0704, validation accuracy: 0.8736, validation loss: 0.5259
100%|██████████| 313/313 [00:40<00:00,  7.75it/s, loss=0.0669]
Epoch 5 | Training loss: 0.0669, validation accuracy: 0.8745, validation loss: 0.5268
100%|██████████| 313/313 [00:40<00:00,  7.74it/s, loss=0.0638]
Epoch 6 | Training loss: 0.0638, validation accuracy: 0.8763, validation loss: 0.5392
100%|██████████| 313/313 [00:40<00:00,  7.77it/s, loss=0.0605]
Epoch 7 | Training loss: 0.0605, validation

In [14]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.984375, 0.045997010950744155)
Valid: (0.8749, 0.5790577341079712)


In [15]:
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

In [16]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)
model_acc.cpu()

Train: (0.98495, 0.0459450534760952)
Valid: (0.8763, 0.5519839563369751)


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [17]:
optimizer.param_groups[0]["lr"] = 1e-4

In [18]:
max_epochs = 300
early_stopping = EarlyStopping(15)

for epoch in range(max_epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")
    if early_stopping.should_stop(val_loss):
        break

100%|██████████| 313/313 [00:40<00:00,  7.72it/s, loss=0.0471]
Epoch 1 | Training loss: 0.0471, validation accuracy: 0.8744, validation loss: 0.5774
100%|██████████| 313/313 [00:40<00:00,  7.71it/s, loss=0.0422]
Epoch 2 | Training loss: 0.0422, validation accuracy: 0.8747, validation loss: 0.5780
100%|██████████| 313/313 [00:40<00:00,  7.75it/s, loss=0.0441]
Epoch 3 | Training loss: 0.0441, validation accuracy: 0.8752, validation loss: 0.5766
100%|██████████| 313/313 [00:40<00:00,  7.75it/s, loss=0.0448]
Epoch 4 | Training loss: 0.0448, validation accuracy: 0.8755, validation loss: 0.5780
100%|██████████| 313/313 [00:40<00:00,  7.77it/s, loss=0.0414]
Epoch 5 | Training loss: 0.0414, validation accuracy: 0.8757, validation loss: 0.5777
100%|██████████| 313/313 [00:40<00:00,  7.75it/s, loss=0.0414]
Epoch 6 | Training loss: 0.0414, validation accuracy: 0.8753, validation loss: 0.5788
100%|██████████| 313/313 [00:40<00:00,  7.79it/s, loss=0.0419]
Epoch 7 | Training loss: 0.0419, validation

In [19]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.987675, 0.03698642331808805)
Valid: (0.8763, 0.5884119760513306)


In [20]:
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

In [21]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)
model_acc.cpu()

Train: (0.9879, 0.036089356358349325)
Valid: (0.8759, 0.5817425794601441)


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

### k = 5

In [5]:
model = make_model()
model.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [6]:
optimizer = AcceleratedSGD(model.parameters(), 1e-2, k=5, momentum=0.9, weight_decay=1e-5, lambda_=1e-8)
loss_fn = nn.NLLLoss()
logger = Logger("vgg_log_augmentation-early_stopping-k=5.txt.no_resizing")

In [7]:
max_epochs = 300
early_stopping = EarlyStopping(15)

for epoch in range(max_epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")
    if early_stopping.should_stop(val_loss):
        break

100%|██████████| 313/313 [00:39<00:00,  7.93it/s, loss=2.0934]
Epoch 1 | Training loss: 2.0934, validation accuracy: 0.2971, validation loss: 1.8501
100%|██████████| 313/313 [00:41<00:00,  7.59it/s, loss=1.7694]
Epoch 2 | Training loss: 1.7694, validation accuracy: 0.3995, validation loss: 1.5711
100%|██████████| 313/313 [00:41<00:00,  7.50it/s, loss=1.5754]
Epoch 3 | Training loss: 1.5754, validation accuracy: 0.4543, validation loss: 1.5082
100%|██████████| 313/313 [00:41<00:00,  7.50it/s, loss=1.4168]
Epoch 4 | Training loss: 1.4168, validation accuracy: 0.5308, validation loss: 1.2675
100%|██████████| 313/313 [00:41<00:00,  7.49it/s, loss=1.2777]
Epoch 5 | Training loss: 1.2777, validation accuracy: 0.5652, validation loss: 1.2029
100%|██████████| 313/313 [00:41<00:00,  7.46it/s, loss=1.1473]
Epoch 6 | Training loss: 1.1473, validation accuracy: 0.6262, validation loss: 1.0277
100%|██████████| 313/313 [00:41<00:00,  7.46it/s, loss=1.0495]
Epoch 7 | Training loss: 1.0495, validation

In [8]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9398, 0.17853410050868987)
Valid: (0.8522, 0.5539824739456177)


In [9]:
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

In [10]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)
model_acc.cpu()
None

Train: (0.969075, 0.09283070743083954)
Valid: (0.875, 0.4566187557220459)


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [11]:
optimizer.param_groups[0]["lr"] = 1e-3

In [12]:
max_epochs = 300
early_stopping = EarlyStopping(15)

for epoch in range(max_epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")
    if early_stopping.should_stop(val_loss):
        break

100%|██████████| 313/313 [00:42<00:00,  7.45it/s, loss=0.1055]
Epoch 1 | Training loss: 0.1055, validation accuracy: 0.8744, validation loss: 0.5145
100%|██████████| 313/313 [00:42<00:00,  7.42it/s, loss=0.0825]
Epoch 2 | Training loss: 0.0825, validation accuracy: 0.8729, validation loss: 0.5198
100%|██████████| 313/313 [00:42<00:00,  7.43it/s, loss=0.0765]
Epoch 3 | Training loss: 0.0765, validation accuracy: 0.8748, validation loss: 0.5279
100%|██████████| 313/313 [00:42<00:00,  7.44it/s, loss=0.0730]
Epoch 4 | Training loss: 0.0730, validation accuracy: 0.8724, validation loss: 0.5431
100%|██████████| 313/313 [00:41<00:00,  7.49it/s, loss=0.0653]
Epoch 5 | Training loss: 0.0653, validation accuracy: 0.8740, validation loss: 0.5584
100%|██████████| 313/313 [00:41<00:00,  7.50it/s, loss=0.0630]
Epoch 6 | Training loss: 0.0630, validation accuracy: 0.8745, validation loss: 0.5520
100%|██████████| 313/313 [00:42<00:00,  7.44it/s, loss=0.0615]
Epoch 7 | Training loss: 0.0615, validation

In [13]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.984925, 0.044134528724104165)
Valid: (0.8755, 0.5896578651428223)


In [14]:
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

In [15]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)
model_acc.cpu()

Train: (0.984525, 0.04439684483408928)
Valid: (0.8764, 0.588962692451477)


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [16]:
optimizer.param_groups[0]["lr"] = 1e-4

In [17]:
max_epochs = 300
early_stopping = EarlyStopping(15)

for epoch in range(max_epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")
    if early_stopping.should_stop(val_loss):
        break

100%|██████████| 313/313 [00:42<00:00,  7.41it/s, loss=0.0465]
Epoch 1 | Training loss: 0.0465, validation accuracy: 0.8766, validation loss: 0.5856
100%|██████████| 313/313 [00:42<00:00,  7.38it/s, loss=0.0446]
Epoch 2 | Training loss: 0.0446, validation accuracy: 0.8755, validation loss: 0.5887
100%|██████████| 313/313 [00:42<00:00,  7.38it/s, loss=0.0422]
Epoch 3 | Training loss: 0.0422, validation accuracy: 0.8760, validation loss: 0.5898
100%|██████████| 313/313 [00:42<00:00,  7.40it/s, loss=0.0419]
Epoch 4 | Training loss: 0.0419, validation accuracy: 0.8766, validation loss: 0.5896
100%|██████████| 313/313 [00:42<00:00,  7.38it/s, loss=0.0424]
Epoch 5 | Training loss: 0.0424, validation accuracy: 0.8768, validation loss: 0.5904
100%|██████████| 313/313 [00:42<00:00,  7.39it/s, loss=0.0434]
Epoch 6 | Training loss: 0.0434, validation accuracy: 0.8761, validation loss: 0.5901
100%|██████████| 313/313 [00:42<00:00,  7.40it/s, loss=0.0435]
Epoch 7 | Training loss: 0.0435, validation

In [18]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.987025, 0.03983512137830257)
Valid: (0.8772, 0.5955881801605225)


In [19]:
model_acc = deepcopy(model)
optimizer.accelerate()
optimizer.store_parameters([model_acc.parameters()])

In [20]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)
model_acc.cpu()
None

Train: (0.985925, 0.04124355467408895)
Valid: (0.8765, 0.5935525712966919)


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1