In [1]:
import sys
sys.path.insert(0, "../..")

In [2]:
import torch
from torch import nn
import numpy as np
from copy import deepcopy

from nn_extrapolation import AcceleratedSGD
from nn_utils import *

In [3]:
torch.cuda.is_available()

True

In [4]:
trainer = Trainer(
    device="cuda",
    loss_fn=nn.NLLLoss(reduction="mean"),
    val_loss_fn=nn.NLLLoss(reduction="sum"),
)

In [5]:
dl = load_dataset(
    dataset="mnist", 
    root="../../../MNIST", 
    download=False, 
    validation_split=0.2,
    batch_size=64, 
    num_workers=2,
)

In [6]:
def make_model():
    return nn.Sequential(
        nn.Conv2d(1, 32, 3),
        nn.ReLU(),
        nn.Conv2d(32, 32, 3),
        nn.ReLU(),
        nn.MaxPool2d(2),
        nn.Conv2d(32, 64, 3),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3),
        nn.ReLU(),
        nn.MaxPool2d(2),
        nn.Flatten(),
        nn.Linear(4*4*64, 128),
        nn.ReLU(),
        nn.Linear(128, 10),
        nn.LogSoftmax(-1),
    )

## Epoch

In [7]:
model = make_model()
model.to(trainer.device)

Sequential(
  (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (1): ReLU()
  (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (3): ReLU()
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (6): ReLU()
  (7): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (8): ReLU()
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Flatten(start_dim=1, end_dim=-1)
  (11): Linear(in_features=1024, out_features=128, bias=True)
  (12): ReLU()
  (13): Linear(in_features=128, out_features=10, bias=True)
  (14): LogSoftmax(dim=-1)
)

In [8]:
optimizer = AcceleratedSGD(model.parameters(), 1e-3, k=10, momentum=0.5, weight_decay=1e-5, mode="epoch")
logger = Logger("SGD_momentum-early_stopping.txt")

In [9]:
epochs = 1000
early_stopping = EarlyStopping(5)

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")
    if early_stopping.should_stop(val_loss):
        break

100%|██████████| 750/750 [00:13<00:00, 53.99it/s, loss=2.2969]
Epoch 1 | Training loss: 2.2969, validation accuracy: 0.1784, validation loss: 2.2900
100%|██████████| 750/750 [00:13<00:00, 56.42it/s, loss=2.2453]
Epoch 2 | Training loss: 2.2453, validation accuracy: 0.6081, validation loss: 2.0571
100%|██████████| 750/750 [00:13<00:00, 53.91it/s, loss=0.8322]
Epoch 3 | Training loss: 0.8322, validation accuracy: 0.8721, validation loss: 0.4183
100%|██████████| 750/750 [00:13<00:00, 56.60it/s, loss=0.3828]
Epoch 4 | Training loss: 0.3828, validation accuracy: 0.8999, validation loss: 0.3218
100%|██████████| 750/750 [00:15<00:00, 47.66it/s, loss=0.2831]
Epoch 5 | Training loss: 0.2831, validation accuracy: 0.9277, validation loss: 0.2429
100%|██████████| 750/750 [00:15<00:00, 49.28it/s, loss=0.2190]
Epoch 6 | Training loss: 0.2190, validation accuracy: 0.9476, validation loss: 0.1788
100%|██████████| 750/750 [00:13<00:00, 55.68it/s, loss=0.1778]
Epoch 7 | Training loss: 0.1778, validation

In [10]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9950416666666667, 0.017324160625071576)
Valid: (0.9835833333333334, 0.058141735661192795)


In [11]:
optimizer.accelerate()

In [12]:
optimizer.store_parameters()
model.cuda()
None

In [13]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9957291666666667, 0.016117793002359878)
Valid: (0.9838333333333333, 0.05537520078022499)


## Epoch average

In [14]:
model = make_model()
model.cuda()
optimizer = AcceleratedSGD(model.parameters(), 1e-3, k=10, momentum=0.5, weight_decay=1e-5, mode="epoch_avg")
logger = Logger("SGD_momentum-avg-early_stopping.txt")

In [15]:
epochs = 1000
early_stopping = EarlyStopping(5)

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")
    if early_stopping.should_stop(val_loss):
        break

100%|██████████| 750/750 [00:13<00:00, 56.48it/s, loss=2.2899]
Epoch 1 | Training loss: 2.2899, validation accuracy: 0.2827, validation loss: 2.2619
100%|██████████| 750/750 [00:12<00:00, 58.20it/s, loss=1.5885]
Epoch 2 | Training loss: 1.5885, validation accuracy: 0.8377, validation loss: 0.5387
100%|██████████| 750/750 [00:12<00:00, 58.04it/s, loss=0.4524]
Epoch 3 | Training loss: 0.4524, validation accuracy: 0.8868, validation loss: 0.3528
100%|██████████| 750/750 [00:12<00:00, 58.25it/s, loss=0.3197]
Epoch 4 | Training loss: 0.3197, validation accuracy: 0.9276, validation loss: 0.2424
100%|██████████| 750/750 [00:13<00:00, 57.46it/s, loss=0.2461]
Epoch 5 | Training loss: 0.2461, validation accuracy: 0.9359, validation loss: 0.2142
100%|██████████| 750/750 [00:13<00:00, 54.11it/s, loss=0.2004]
Epoch 6 | Training loss: 0.2004, validation accuracy: 0.9508, validation loss: 0.1683
100%|██████████| 750/750 [00:13<00:00, 57.08it/s, loss=0.1650]
Epoch 7 | Training loss: 0.1650, validation

In [16]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9936666666666667, 0.01943125759270818)
Valid: (0.9841666666666666, 0.056276748875854536)


In [17]:
optimizer.accelerate()

In [18]:
optimizer.store_parameters()
model.cuda()
None

In [19]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9914583333333333, 0.028339443065226077)
Valid: (0.9831666666666666, 0.056663194953463975)


## Epoch average, with span = 100

In [7]:
model = make_model()
model.cuda()
optimizer = AcceleratedSGD(model.parameters(), 1e-3, k=10, momentum=0.5, weight_decay=1e-5, mode="epoch_avg", avg_alpha = (2 / (100 + 1)))
logger = Logger("SGD_momentum-avg_span_100-early_stopping.txt")

In [8]:
epochs = 1000
early_stopping = EarlyStopping(5)

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")
    if early_stopping.should_stop(val_loss):
        break

100%|██████████| 750/750 [00:14<00:00, 53.57it/s, loss=2.2937]
Epoch 1 | Training loss: 2.2937, validation accuracy: 0.2708, validation loss: 2.2780
100%|██████████| 750/750 [00:15<00:00, 48.73it/s, loss=1.9645]
Epoch 2 | Training loss: 1.9645, validation accuracy: 0.7950, validation loss: 0.7574
100%|██████████| 750/750 [00:16<00:00, 45.69it/s, loss=0.5181]
Epoch 3 | Training loss: 0.5181, validation accuracy: 0.8860, validation loss: 0.3746
100%|██████████| 750/750 [00:16<00:00, 44.13it/s, loss=0.3462]
Epoch 4 | Training loss: 0.3462, validation accuracy: 0.9165, validation loss: 0.2812
100%|██████████| 750/750 [00:16<00:00, 46.22it/s, loss=0.2619]
Epoch 5 | Training loss: 0.2619, validation accuracy: 0.9308, validation loss: 0.2287
100%|██████████| 750/750 [00:15<00:00, 47.46it/s, loss=0.2093]
Epoch 6 | Training loss: 0.2093, validation accuracy: 0.9463, validation loss: 0.1798
100%|██████████| 750/750 [00:16<00:00, 46.15it/s, loss=0.1720]
Epoch 7 | Training loss: 0.1720, validation

In [9]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9903958333333334, 0.03071345314880212)
Valid: (0.9815833333333334, 0.06359571423769618)


In [10]:
optimizer.accelerate()

In [11]:
optimizer.store_parameters()
model.cuda()
None

In [12]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9930833333333333, 0.02502107828987452)
Valid: (0.9835833333333334, 0.05615078301091368)


## Epoch average, with span = 50

In [13]:
model = make_model()
model.cuda()
optimizer = AcceleratedSGD(model.parameters(), 1e-3, k=10, momentum=0.5, weight_decay=1e-5, mode="epoch_avg", avg_alpha = (2 / (50 + 1)))
logger = Logger("SGD_momentum-avg_span_50-early_stopping.txt")

In [14]:
epochs = 1000
early_stopping = EarlyStopping(5)

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")
    if early_stopping.should_stop(val_loss):
        break

100%|██████████| 750/750 [00:13<00:00, 57.28it/s, loss=2.2874]
Epoch 1 | Training loss: 2.2874, validation accuracy: 0.1900, validation loss: 2.2533
100%|██████████| 750/750 [00:13<00:00, 56.53it/s, loss=1.4535]
Epoch 2 | Training loss: 1.4535, validation accuracy: 0.8669, validation loss: 0.4567
100%|██████████| 750/750 [00:13<00:00, 56.69it/s, loss=0.3980]
Epoch 3 | Training loss: 0.3980, validation accuracy: 0.9013, validation loss: 0.3134
100%|██████████| 750/750 [00:13<00:00, 57.25it/s, loss=0.2835]
Epoch 4 | Training loss: 0.2835, validation accuracy: 0.9335, validation loss: 0.2258
100%|██████████| 750/750 [00:12<00:00, 58.00it/s, loss=0.2180]
Epoch 5 | Training loss: 0.2180, validation accuracy: 0.9447, validation loss: 0.1834
100%|██████████| 750/750 [00:12<00:00, 58.11it/s, loss=0.1757]
Epoch 6 | Training loss: 0.1757, validation accuracy: 0.9534, validation loss: 0.1566
100%|██████████| 750/750 [00:13<00:00, 56.82it/s, loss=0.1474]
Epoch 7 | Training loss: 0.1474, validation

In [15]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9914166666666666, 0.02713526949007064)
Valid: (0.98225, 0.06181334049627185)


In [16]:
optimizer.accelerate()

In [17]:
optimizer.store_parameters()
model.cuda()
None

In [18]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9937291666666667, 0.02244178114986668)
Valid: (0.9841666666666666, 0.056203421553752075)


## Epoch average, with span = 20

In [20]:
model = make_model()
model.cuda()
optimizer = AcceleratedSGD(model.parameters(), 1e-3, k=10, momentum=0.5, weight_decay=1e-5, mode="epoch_avg", avg_alpha = (2 / (20 + 1)))
logger = Logger("SGD_momentum-avg_span_20-early_stopping.txt")

In [21]:
epochs = 1000
early_stopping = EarlyStopping(5)

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")
    if early_stopping.should_stop(val_loss):
        break

100%|██████████| 750/750 [00:13<00:00, 56.87it/s, loss=2.2953]
Epoch 1 | Training loss: 2.2953, validation accuracy: 0.2700, validation loss: 2.2825
100%|██████████| 750/750 [00:13<00:00, 57.16it/s, loss=2.0959]
Epoch 2 | Training loss: 2.0959, validation accuracy: 0.7572, validation loss: 1.0105
100%|██████████| 750/750 [00:13<00:00, 57.40it/s, loss=0.5191]
Epoch 3 | Training loss: 0.5191, validation accuracy: 0.8990, validation loss: 0.3467
100%|██████████| 750/750 [00:13<00:00, 57.31it/s, loss=0.3205]
Epoch 4 | Training loss: 0.3205, validation accuracy: 0.9277, validation loss: 0.2492
100%|██████████| 750/750 [00:13<00:00, 57.06it/s, loss=0.2356]
Epoch 5 | Training loss: 0.2356, validation accuracy: 0.9370, validation loss: 0.2145
100%|██████████| 750/750 [00:13<00:00, 56.98it/s, loss=0.1871]
Epoch 6 | Training loss: 0.1871, validation accuracy: 0.9540, validation loss: 0.1615
100%|██████████| 750/750 [00:13<00:00, 57.11it/s, loss=0.1545]
Epoch 7 | Training loss: 0.1545, validation

In [22]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9915833333333334, 0.02811048800110196)
Valid: (0.9825, 0.060424668717353296)


In [23]:
optimizer.accelerate()

In [24]:
optimizer.store_parameters()
model.cuda()
None

In [25]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9926875, 0.026050740072503686)
Valid: (0.984, 0.05607895925998067)


## Epoch average, with span = 15

In [26]:
model = make_model()
model.cuda()
optimizer = AcceleratedSGD(model.parameters(), 1e-3, k=10, momentum=0.5, weight_decay=1e-5, mode="epoch_avg", avg_alpha = (2 / (15 + 1)))
logger = Logger("SGD_momentum-avg_span_15-early_stopping.txt")

In [27]:
epochs = 1000
early_stopping = EarlyStopping(5)

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")
    if early_stopping.should_stop(val_loss):
        break

100%|██████████| 750/750 [00:12<00:00, 57.84it/s, loss=2.2969]
Epoch 1 | Training loss: 2.2969, validation accuracy: 0.1794, validation loss: 2.2844
100%|██████████| 750/750 [00:12<00:00, 57.74it/s, loss=2.1187]
Epoch 2 | Training loss: 2.1187, validation accuracy: 0.7600, validation loss: 1.1245
100%|██████████| 750/750 [00:12<00:00, 57.92it/s, loss=0.5478]
Epoch 3 | Training loss: 0.5478, validation accuracy: 0.8782, validation loss: 0.3839
100%|██████████| 750/750 [00:12<00:00, 57.92it/s, loss=0.3430]
Epoch 4 | Training loss: 0.3430, validation accuracy: 0.9225, validation loss: 0.2615
100%|██████████| 750/750 [00:12<00:00, 57.74it/s, loss=0.2544]
Epoch 5 | Training loss: 0.2544, validation accuracy: 0.9316, validation loss: 0.2234
100%|██████████| 750/750 [00:12<00:00, 57.78it/s, loss=0.1990]
Epoch 6 | Training loss: 0.1990, validation accuracy: 0.9495, validation loss: 0.1675
100%|██████████| 750/750 [00:12<00:00, 57.72it/s, loss=0.1618]
Epoch 7 | Training loss: 0.1618, validation

In [28]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.992875, 0.02253287366971684)
Valid: (0.9830833333333333, 0.059153521404834465)


In [29]:
optimizer.accelerate()

In [30]:
optimizer.store_parameters()
model.cuda()
None

In [31]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9949583333333333, 0.018900427822217656)
Valid: (0.985, 0.05393957465126489)


## Epoch average, with span = 10

In [32]:
model = make_model()
model.cuda()
optimizer = AcceleratedSGD(model.parameters(), 1e-3, k=10, momentum=0.5, weight_decay=1e-5, mode="epoch_avg", avg_alpha = (2 / (10 + 1)))
logger = Logger("SGD_momentum-avg_span_10-early_stopping.txt")

In [33]:
epochs = 1000
early_stopping = EarlyStopping(5)

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")
    if early_stopping.should_stop(val_loss):
        break

100%|██████████| 750/750 [00:13<00:00, 56.56it/s, loss=2.2958]
Epoch 1 | Training loss: 2.2958, validation accuracy: 0.1129, validation loss: 2.2836
100%|██████████| 750/750 [00:14<00:00, 52.89it/s, loss=2.0953]
Epoch 2 | Training loss: 2.0953, validation accuracy: 0.7692, validation loss: 1.0024
100%|██████████| 750/750 [00:13<00:00, 56.26it/s, loss=0.5282]
Epoch 3 | Training loss: 0.5282, validation accuracy: 0.8942, validation loss: 0.3493
100%|██████████| 750/750 [00:13<00:00, 54.32it/s, loss=0.3333]
Epoch 4 | Training loss: 0.3333, validation accuracy: 0.9143, validation loss: 0.2766
100%|██████████| 750/750 [00:13<00:00, 55.73it/s, loss=0.2460]
Epoch 5 | Training loss: 0.2460, validation accuracy: 0.9378, validation loss: 0.2060
100%|██████████| 750/750 [00:13<00:00, 56.15it/s, loss=0.1888]
Epoch 6 | Training loss: 0.1888, validation accuracy: 0.9513, validation loss: 0.1628
100%|██████████| 750/750 [00:13<00:00, 57.18it/s, loss=0.1546]
Epoch 7 | Training loss: 0.1546, validation

In [34]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9949166666666667, 0.016756944798340556)
Valid: (0.9835833333333334, 0.06118708448329319)


In [35]:
optimizer.accelerate()

In [36]:
optimizer.store_parameters()
model.cuda()
None

In [37]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9969583333333333, 0.012952309324813541)
Valid: (0.9854166666666667, 0.05370495984070779)


## Epoch average, with span = 5

In [38]:
model = make_model()
model.cuda()
optimizer = AcceleratedSGD(model.parameters(), 1e-3, k=10, momentum=0.5, weight_decay=1e-5, mode="epoch_avg", avg_alpha = (2 / (5 + 1)))
logger = Logger("SGD_momentum-avg_span_5-early_stopping.txt")

In [39]:
epochs = 1000
early_stopping = EarlyStopping(5)

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")
    if early_stopping.should_stop(val_loss):
        break

100%|██████████| 750/750 [00:13<00:00, 56.04it/s, loss=2.2901]
Epoch 1 | Training loss: 2.2901, validation accuracy: 0.2288, validation loss: 2.2640
100%|██████████| 750/750 [00:13<00:00, 56.45it/s, loss=1.6426]
Epoch 2 | Training loss: 1.6426, validation accuracy: 0.8390, validation loss: 0.5354
100%|██████████| 750/750 [00:13<00:00, 56.13it/s, loss=0.4382]
Epoch 3 | Training loss: 0.4382, validation accuracy: 0.9028, validation loss: 0.3304
100%|██████████| 750/750 [00:13<00:00, 55.72it/s, loss=0.3186]
Epoch 4 | Training loss: 0.3186, validation accuracy: 0.9255, validation loss: 0.2485
100%|██████████| 750/750 [00:13<00:00, 56.54it/s, loss=0.2499]
Epoch 5 | Training loss: 0.2499, validation accuracy: 0.9410, validation loss: 0.2006
100%|██████████| 750/750 [00:13<00:00, 56.70it/s, loss=0.2063]
Epoch 6 | Training loss: 0.2063, validation accuracy: 0.9404, validation loss: 0.1931
100%|██████████| 750/750 [00:13<00:00, 56.43it/s, loss=0.1750]
Epoch 7 | Training loss: 0.1750, validation

In [40]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9958541666666667, 0.014195251921541058)
Valid: (0.9849166666666667, 0.057167659000018224)


In [41]:
optimizer.accelerate()

In [42]:
optimizer.store_parameters()
model.cuda()
None

In [43]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.997125, 0.012467049477835342)
Valid: (0.9853333333333333, 0.052354587704777564)
