In [1]:
import sys
sys.path.insert(0, "../..")

In [2]:
import torch
from torch import nn
import numpy as np
from copy import deepcopy

from nn_extrapolation import AcceleratedSGD
from nn_utils import *

In [3]:
torch.cuda.is_available()

True

In [5]:
trainer = Trainer(
    device="cuda",
    loss_fn=nn.NLLLoss(reduction="mean"),
    val_loss_fn=nn.NLLLoss(reduction="sum"),
)

In [22]:
dl = load_dataset(
    dataset="mnist", 
    root="../../../MNIST", 
    download=False, 
    validation_split=0.2,
    batch_size=64, 
    num_workers=2,
)

In [23]:
def make_model():
    return nn.Sequential(
        nn.Conv2d(1, 32, 3),
        nn.ReLU(),
        nn.Conv2d(32, 32, 3),
        nn.ReLU(),
        nn.MaxPool2d(2),
        nn.Conv2d(32, 64, 3),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3),
        nn.ReLU(),
        nn.MaxPool2d(2),
        nn.Flatten(),
        nn.Linear(4*4*64, 128),
        nn.ReLU(),
        nn.Linear(128, 10),
        nn.LogSoftmax(-1),
    )

In [24]:
model = make_model()
model.to(trainer.device)

Sequential(
  (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (1): ReLU()
  (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (3): ReLU()
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (6): ReLU()
  (7): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (8): ReLU()
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Flatten(start_dim=1, end_dim=-1)
  (11): Linear(in_features=1024, out_features=128, bias=True)
  (12): ReLU()
  (13): Linear(in_features=128, out_features=10, bias=True)
  (14): LogSoftmax(dim=-1)
)

In [25]:
optimizer = AcceleratedSGD(model.parameters(), 1e-3, k=10, momentum=0.5, weight_decay=1e-5, mode="epoch")
logger = Logger("SGD_momentum-short-cont.txt")

## Epoch

In [26]:
epochs = 20

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 750/750 [00:12<00:00, 58.76it/s, loss=2.2987]
Epoch 1 | Training loss: 2.2987, validation accuracy: 0.1209, validation loss: 2.2932
100%|██████████| 750/750 [00:12<00:00, 58.95it/s, loss=2.2761]
Epoch 2 | Training loss: 2.2761, validation accuracy: 0.3397, validation loss: 2.2350
100%|██████████| 750/750 [00:12<00:00, 58.59it/s, loss=1.3827]
Epoch 3 | Training loss: 1.3827, validation accuracy: 0.8638, validation loss: 0.4631
100%|██████████| 750/750 [00:12<00:00, 58.19it/s, loss=0.3929]
Epoch 4 | Training loss: 0.3929, validation accuracy: 0.9178, validation loss: 0.2847
100%|██████████| 750/750 [00:12<00:00, 58.78it/s, loss=0.2789]
Epoch 5 | Training loss: 0.2789, validation accuracy: 0.9354, validation loss: 0.2176
100%|██████████| 750/750 [00:12<00:00, 58.83it/s, loss=0.2123]
Epoch 6 | Training loss: 0.2123, validation accuracy: 0.9452, validation loss: 0.1856
100%|██████████| 750/750 [00:13<00:00, 57.02it/s, loss=0.1692]
Epoch 7 | Training loss: 0.1692, validation

In [29]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9850416666666667, 0.05008074772513161)
Valid: (0.9798333333333333, 0.06808133846521378)


In [30]:
%%time
optimizer.accelerate()

CPU times: user 234 ms, sys: 20.6 ms, total: 254 ms
Wall time: 111 ms


In [34]:
model_acc = deepcopy(model)
optimizer.store_parameters([model_acc.parameters()])
model_acc.cuda()
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)
model_acc.cpu()
None

Train: (0.9863958333333334, 0.04579724781960249)
Valid: (0.9806666666666667, 0.06394591045286506)


In [35]:
epochs = 10

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 750/750 [00:12<00:00, 57.85it/s, loss=0.0542]
Epoch 1 | Training loss: 0.0542, validation accuracy: 0.9797, validation loss: 0.0671
100%|██████████| 750/750 [00:12<00:00, 58.25it/s, loss=0.0516]
Epoch 2 | Training loss: 0.0516, validation accuracy: 0.9793, validation loss: 0.0675
100%|██████████| 750/750 [00:12<00:00, 58.59it/s, loss=0.0499]
Epoch 3 | Training loss: 0.0499, validation accuracy: 0.9806, validation loss: 0.0642
100%|██████████| 750/750 [00:12<00:00, 58.59it/s, loss=0.0480]
Epoch 4 | Training loss: 0.0480, validation accuracy: 0.9793, validation loss: 0.0662
100%|██████████| 750/750 [00:13<00:00, 56.47it/s, loss=0.0464]
Epoch 5 | Training loss: 0.0464, validation accuracy: 0.9783, validation loss: 0.0724
100%|██████████| 750/750 [00:13<00:00, 56.99it/s, loss=0.0443]
Epoch 6 | Training loss: 0.0443, validation accuracy: 0.9812, validation loss: 0.0625
100%|██████████| 750/750 [00:13<00:00, 56.57it/s, loss=0.0423]
Epoch 7 | Training loss: 0.0423, validation

In [36]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9895833333333334, 0.035504481708320476)
Valid: (0.98125, 0.06216497187719991)


In [37]:
%%time
optimizer.accelerate()

CPU times: user 209 ms, sys: 3.57 ms, total: 212 ms
Wall time: 81.8 ms


In [38]:
model_acc = deepcopy(model)
optimizer.store_parameters([model_acc.parameters()])
model_acc.cuda()
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)
model_acc.cpu()
None

Train: (0.9901458333333333, 0.03358460598718375)
Valid: (0.9815, 0.05974481407242516)


In [39]:
epochs = 10

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 750/750 [00:13<00:00, 57.26it/s, loss=0.0373]
Epoch 1 | Training loss: 0.0373, validation accuracy: 0.9820, validation loss: 0.0610
100%|██████████| 750/750 [00:12<00:00, 58.22it/s, loss=0.0360]
Epoch 2 | Training loss: 0.0360, validation accuracy: 0.9824, validation loss: 0.0626
100%|██████████| 750/750 [00:12<00:00, 58.50it/s, loss=0.0350]
Epoch 3 | Training loss: 0.0350, validation accuracy: 0.9817, validation loss: 0.0641
100%|██████████| 750/750 [00:13<00:00, 57.65it/s, loss=0.0330]
Epoch 4 | Training loss: 0.0330, validation accuracy: 0.9827, validation loss: 0.0607
100%|██████████| 750/750 [00:12<00:00, 58.24it/s, loss=0.0325]
Epoch 5 | Training loss: 0.0325, validation accuracy: 0.9827, validation loss: 0.0600
100%|██████████| 750/750 [00:13<00:00, 57.31it/s, loss=0.0316]
Epoch 6 | Training loss: 0.0316, validation accuracy: 0.9832, validation loss: 0.0573
100%|██████████| 750/750 [00:12<00:00, 58.21it/s, loss=0.0306]
Epoch 7 | Training loss: 0.0306, validation

In [40]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9910833333333333, 0.02918727777626676)
Valid: (0.9808333333333333, 0.06308799447724596)


In [41]:
%%time
optimizer.accelerate()

CPU times: user 378 ms, sys: 2.76 ms, total: 380 ms
Wall time: 146 ms


In [42]:
model_acc = deepcopy(model)
optimizer.store_parameters([model_acc.parameters()])
model_acc.cuda()
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)
model_acc.cpu()
None

Train: (0.9930416666666667, 0.024613907935951525)
Valid: (0.98375, 0.05598679292652135)


In [43]:
epochs = 10

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 750/750 [00:12<00:00, 58.18it/s, loss=0.0273]
Epoch 1 | Training loss: 0.0273, validation accuracy: 0.9828, validation loss: 0.0595
100%|██████████| 750/750 [00:12<00:00, 58.06it/s, loss=0.0261]
Epoch 2 | Training loss: 0.0261, validation accuracy: 0.9844, validation loss: 0.0547
100%|██████████| 750/750 [00:13<00:00, 57.61it/s, loss=0.0255]
Epoch 3 | Training loss: 0.0255, validation accuracy: 0.9837, validation loss: 0.0559
100%|██████████| 750/750 [00:13<00:00, 57.69it/s, loss=0.0241]
Epoch 4 | Training loss: 0.0241, validation accuracy: 0.9835, validation loss: 0.0582
100%|██████████| 750/750 [00:12<00:00, 57.75it/s, loss=0.0235]
Epoch 5 | Training loss: 0.0235, validation accuracy: 0.9842, validation loss: 0.0550
100%|██████████| 750/750 [00:13<00:00, 56.71it/s, loss=0.0227]
Epoch 6 | Training loss: 0.0227, validation accuracy: 0.9838, validation loss: 0.0572
100%|██████████| 750/750 [00:12<00:00, 57.82it/s, loss=0.0220]
Epoch 7 | Training loss: 0.0220, validation

In [44]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9957083333333333, 0.016038188162783625)
Valid: (0.98525, 0.05553407273973183)


In [45]:
%%time
optimizer.accelerate()

CPU times: user 126 ms, sys: 228 µs, total: 126 ms
Wall time: 45.4 ms


In [46]:
model_acc = deepcopy(model)
optimizer.store_parameters([model_acc.parameters()])
model_acc.cuda()
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)
model_acc.cpu()
None

Train: (0.995875, 0.01646603027166566)
Valid: (0.9854166666666667, 0.0537477698623746)


In [47]:
epochs = 10

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 750/750 [00:12<00:00, 58.22it/s, loss=0.0192]
Epoch 1 | Training loss: 0.0192, validation accuracy: 0.9847, validation loss: 0.0552
100%|██████████| 750/750 [00:12<00:00, 57.98it/s, loss=0.0187]
Epoch 2 | Training loss: 0.0187, validation accuracy: 0.9840, validation loss: 0.0576
100%|██████████| 750/750 [00:13<00:00, 57.01it/s, loss=0.0180]
Epoch 3 | Training loss: 0.0180, validation accuracy: 0.9826, validation loss: 0.0613
100%|██████████| 750/750 [00:13<00:00, 55.94it/s, loss=0.0177]
Epoch 4 | Training loss: 0.0177, validation accuracy: 0.9827, validation loss: 0.0620
100%|██████████| 750/750 [00:13<00:00, 56.52it/s, loss=0.0170]
Epoch 5 | Training loss: 0.0170, validation accuracy: 0.9843, validation loss: 0.0571
100%|██████████| 750/750 [00:13<00:00, 55.37it/s, loss=0.0161]
Epoch 6 | Training loss: 0.0161, validation accuracy: 0.9827, validation loss: 0.0669
100%|██████████| 750/750 [00:13<00:00, 56.45it/s, loss=0.0163]
Epoch 7 | Training loss: 0.0163, validation

In [48]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.997, 0.011815481963159982)
Valid: (0.9845833333333334, 0.05835768977042365)


In [49]:
%%time
optimizer.accelerate()

CPU times: user 256 ms, sys: 7.18 ms, total: 263 ms
Wall time: 110 ms


In [50]:
model_acc = deepcopy(model)
optimizer.store_parameters([model_acc.parameters()])
model_acc.cuda()
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)
model_acc.cpu()
None

Train: (0.9973958333333334, 0.011270788766045977)
Valid: (0.9853333333333333, 0.05516895678074798)
