In [1]:
import sys
sys.path.insert(0, "../..")

In [2]:
import torch
from torch import nn
from torchvision import models
from copy import deepcopy
import os

from nn_extrapolation import AcceleratedSGD
from nn_utils import *

In [3]:
trainer = Trainer(
    device="cuda:1",
    loss_fn=nn.NLLLoss(reduction="mean"),
    val_loss_fn=nn.NLLLoss(reduction="sum"),
)

In [4]:
dl = load_dataset(
    dataset="CIFAR10",
    root=os.path.join("/tmp", os.environ["USER"], "CIFAR"),
    augmentation=transforms.RandomAffine(10, scale=(0.9, 1.1), translate=(0.2, 0.2)),
    validation_split=0.2,
    batch_size=128,
    num_workers=10,
)

Files already downloaded and verified
Files already downloaded and verified


## All layers separate

In [5]:
model = models.vgg16(pretrained=False)
model.classifier[6] = nn.Linear(4096, 10)
model.classifier.add_module("7", nn.LogSoftmax(-1))

model.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [6]:
trainer.validation(model, dl["valid"])

(0.1011, 2.3026913608551025)

In [7]:
groups = [{"params": [param]} for param in model.parameters()]
optimizer = AcceleratedSGD(groups, 1e-2, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8)
logger = Logger("vgg_log_augmentation-split.txt.no_resizing")

In [8]:
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:39<00:00,  7.93it/s, loss=2.1235]
Epoch 1 | Training loss: 2.1235, validation accuracy: 0.2703, validation loss: 1.9072
100%|██████████| 313/313 [00:40<00:00,  7.73it/s, loss=1.7648]
Epoch 2 | Training loss: 1.7648, validation accuracy: 0.4009, validation loss: 1.5813
100%|██████████| 313/313 [00:40<00:00,  7.72it/s, loss=1.5661]
Epoch 3 | Training loss: 1.5661, validation accuracy: 0.4442, validation loss: 1.4963
100%|██████████| 313/313 [00:40<00:00,  7.74it/s, loss=1.4134]
Epoch 4 | Training loss: 1.4134, validation accuracy: 0.5631, validation loss: 1.2067
100%|██████████| 313/313 [00:40<00:00,  7.77it/s, loss=1.2669]
Epoch 5 | Training loss: 1.2669, validation accuracy: 0.5854, validation loss: 1.1578
100%|██████████| 313/313 [00:40<00:00,  7.81it/s, loss=1.1422]
Epoch 6 | Training loss: 1.1422, validation accuracy: 0.6363, validation loss: 1.0416
100%|██████████| 313/313 [00:40<00:00,  7.82it/s, loss=1.0410]
Epoch 7 | Training loss: 1.0410, validation

In [9]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.89215, 0.31207365999221803)
Valid: (0.8367, 0.528479273223877)


In [10]:
model_acc = deepcopy(model)
optimizer.accelerate()
target_groups = [[param] for param in model_acc.parameters()]
optimizer.store_parameters(target_groups)

In [11]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9241, 0.2236853707075119)
Valid: (0.8637, 0.4217224886894226)


In [12]:
for group in optimizer.param_groups:
    group["lr"] = 1e-3

In [13]:
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:40<00:00,  7.71it/s, loss=0.2217]
Epoch 1 | Training loss: 0.2217, validation accuracy: 0.8595, validation loss: 0.4645
100%|██████████| 313/313 [00:41<00:00,  7.57it/s, loss=0.1898]
Epoch 2 | Training loss: 0.1898, validation accuracy: 0.8629, validation loss: 0.4621
100%|██████████| 313/313 [00:41<00:00,  7.62it/s, loss=0.1766]
Epoch 3 | Training loss: 0.1766, validation accuracy: 0.8627, validation loss: 0.4764
100%|██████████| 313/313 [00:41<00:00,  7.63it/s, loss=0.1741]
Epoch 4 | Training loss: 0.1741, validation accuracy: 0.8643, validation loss: 0.4738
100%|██████████| 313/313 [00:40<00:00,  7.69it/s, loss=0.1664]
Epoch 5 | Training loss: 0.1664, validation accuracy: 0.8606, validation loss: 0.4858
100%|██████████| 313/313 [00:40<00:00,  7.67it/s, loss=0.1607]
Epoch 6 | Training loss: 0.1607, validation accuracy: 0.8598, validation loss: 0.4937
100%|██████████| 313/313 [00:40<00:00,  7.71it/s, loss=0.1550]
Epoch 7 | Training loss: 0.1550, validation

In [14]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.966625, 0.09466175615787506)
Valid: (0.8623, 0.5578842914581299)


In [15]:
model_acc = deepcopy(model)
optimizer.accelerate()
target_groups = [[param] for param in model_acc.parameters()]
optimizer.store_parameters(target_groups)

In [16]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.971025, 0.08556011869311332)
Valid: (0.8669, 0.5273814085006714)


## Linear only

In [17]:
model = models.vgg16(pretrained=False)
model.classifier[6] = nn.Linear(4096, 10)
model.classifier.add_module("7", nn.LogSoftmax(-1))

model.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [18]:
trainer.validation(model, dl["valid"])

(0.1008, 2.302828458404541)

In [19]:
conv_group = {
    "params": list(model.features.parameters()),
    "method": None
}
fc_group = {
    "params": list(model.classifier.parameters())
}
groups = [conv_group, fc_group]
optimizer = AcceleratedSGD(groups, 1e-2, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8)
logger = Logger("vgg_log_augmentation-linear_only.txt.no_resizing")

In [20]:
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:39<00:00,  7.91it/s, loss=2.0662]
Epoch 1 | Training loss: 2.0662, validation accuracy: 0.2911, validation loss: 1.8024
100%|██████████| 313/313 [00:40<00:00,  7.64it/s, loss=1.7103]
Epoch 2 | Training loss: 1.7103, validation accuracy: 0.4021, validation loss: 1.5752
100%|██████████| 313/313 [00:40<00:00,  7.64it/s, loss=1.5356]
Epoch 3 | Training loss: 1.5356, validation accuracy: 0.4817, validation loss: 1.3911
100%|██████████| 313/313 [00:40<00:00,  7.71it/s, loss=1.3609]
Epoch 4 | Training loss: 1.3609, validation accuracy: 0.5973, validation loss: 1.1243
100%|██████████| 313/313 [00:40<00:00,  7.72it/s, loss=1.2414]
Epoch 5 | Training loss: 1.2414, validation accuracy: 0.6053, validation loss: 1.1064
100%|██████████| 313/313 [00:40<00:00,  7.71it/s, loss=1.1120]
Epoch 6 | Training loss: 1.1120, validation accuracy: 0.6475, validation loss: 0.9919
100%|██████████| 313/313 [00:40<00:00,  7.79it/s, loss=1.0314]
Epoch 7 | Training loss: 1.0314, validation

In [21]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.880025, 0.3454588752031326)
Valid: (0.8279, 0.5698312043190003)


In [22]:
model_acc = deepcopy(model)
optimizer.accelerate()
target_groups = [list(model_acc.features.parameters()), list(model_acc.classifier.parameters())]
optimizer.store_parameters(target_groups)

In [23]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.879075, 0.3530905436992645)
Valid: (0.8269, 0.5610731469154357)


In [24]:
for group in optimizer.param_groups:
    group["lr"] = 1e-3

In [25]:
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:40<00:00,  7.73it/s, loss=0.2233]
Epoch 1 | Training loss: 0.2233, validation accuracy: 0.8661, validation loss: 0.4487
100%|██████████| 313/313 [00:41<00:00,  7.60it/s, loss=0.1982]
Epoch 2 | Training loss: 0.1982, validation accuracy: 0.8692, validation loss: 0.4406
100%|██████████| 313/313 [00:41<00:00,  7.58it/s, loss=0.1783]
Epoch 3 | Training loss: 0.1783, validation accuracy: 0.8713, validation loss: 0.4547
100%|██████████| 313/313 [00:41<00:00,  7.60it/s, loss=0.1771]
Epoch 4 | Training loss: 0.1771, validation accuracy: 0.8683, validation loss: 0.4518
100%|██████████| 313/313 [00:41<00:00,  7.62it/s, loss=0.1711]
Epoch 5 | Training loss: 0.1711, validation accuracy: 0.8714, validation loss: 0.4518
100%|██████████| 313/313 [00:40<00:00,  7.70it/s, loss=0.1635]
Epoch 6 | Training loss: 0.1635, validation accuracy: 0.8689, validation loss: 0.4634
100%|██████████| 313/313 [00:40<00:00,  7.71it/s, loss=0.1583]
Epoch 7 | Training loss: 0.1583, validation

In [26]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.96905, 0.08890548278093338)
Valid: (0.877, 0.513082452583313)


In [27]:
model_acc = deepcopy(model)
optimizer.accelerate()
target_groups = [list(model_acc.features.parameters()), list(model_acc.classifier.parameters())]
optimizer.store_parameters(target_groups)

In [28]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.9692, 0.09143368145674467)
Valid: (0.8761, 0.5049447832107544)


## Linear and conv separate

In [29]:
model = models.vgg16(pretrained=False)
model.classifier[6] = nn.Linear(4096, 10)
model.classifier.add_module("7", nn.LogSoftmax(-1))

model.to(trainer.device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [30]:
trainer.validation(model, dl["valid"])

(0.1004, 2.3031337482452394)

In [31]:
conv_group = {
    "params": list(model.features.parameters()),
}
fc_group = {
    "params": list(model.classifier.parameters())
}
groups = [conv_group, fc_group]
optimizer = AcceleratedSGD(groups, 1e-2, k=10, momentum=0.9, weight_decay=1e-5, lambda_=1e-8)
logger = Logger("vgg_log_augmentation-conv_linear_splits.txt.no_resizing")

In [32]:
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:40<00:00,  7.66it/s, loss=2.0948]
Epoch 1 | Training loss: 2.0948, validation accuracy: 0.2827, validation loss: 1.8539
100%|██████████| 313/313 [00:41<00:00,  7.51it/s, loss=1.7338]
Epoch 2 | Training loss: 1.7338, validation accuracy: 0.4168, validation loss: 1.5119
100%|██████████| 313/313 [00:42<00:00,  7.42it/s, loss=1.5295]
Epoch 3 | Training loss: 1.5295, validation accuracy: 0.4960, validation loss: 1.3821
100%|██████████| 313/313 [00:41<00:00,  7.51it/s, loss=1.3694]
Epoch 4 | Training loss: 1.3694, validation accuracy: 0.5525, validation loss: 1.2337
100%|██████████| 313/313 [00:41<00:00,  7.54it/s, loss=1.2270]
Epoch 5 | Training loss: 1.2270, validation accuracy: 0.6050, validation loss: 1.0831
100%|██████████| 313/313 [00:41<00:00,  7.53it/s, loss=1.1128]
Epoch 6 | Training loss: 1.1128, validation accuracy: 0.6604, validation loss: 0.9749
100%|██████████| 313/313 [00:41<00:00,  7.58it/s, loss=1.0203]
Epoch 7 | Training loss: 1.0203, validation

In [33]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.890925, 0.30734144163131716)
Valid: (0.8373, 0.5214660457611084)


In [34]:
model_acc = deepcopy(model)
optimizer.accelerate()
target_groups = [list(model_acc.features.parameters()), list(model_acc.classifier.parameters())]
optimizer.store_parameters(target_groups)

In [35]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.92505, 0.22132562918663026)
Valid: (0.8647, 0.4144107507705688)


In [36]:
for group in optimizer.param_groups:
    group["lr"] = 1e-3

In [37]:
epochs = 30

for epoch in range(epochs):
    train_loss = trainer.train_epoch(model, optimizer, dl["train"])
    optimizer.finish_epoch()
    val_acc, val_loss = trainer.validation(model, dl["valid"])
    logger.log("Epoch", epoch+1, "|", 
          f"Training loss: {train_loss:.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")

100%|██████████| 313/313 [00:41<00:00,  7.52it/s, loss=0.2177]
Epoch 1 | Training loss: 0.2177, validation accuracy: 0.8616, validation loss: 0.4688
100%|██████████| 313/313 [00:42<00:00,  7.42it/s, loss=0.1871]
Epoch 2 | Training loss: 0.1871, validation accuracy: 0.8646, validation loss: 0.4621
100%|██████████| 313/313 [00:41<00:00,  7.52it/s, loss=0.1800]
Epoch 3 | Training loss: 0.1800, validation accuracy: 0.8666, validation loss: 0.4637
100%|██████████| 313/313 [00:41<00:00,  7.48it/s, loss=0.1686]
Epoch 4 | Training loss: 0.1686, validation accuracy: 0.8666, validation loss: 0.4665
100%|██████████| 313/313 [00:41<00:00,  7.54it/s, loss=0.1611]
Epoch 5 | Training loss: 0.1611, validation accuracy: 0.8673, validation loss: 0.4675
100%|██████████| 313/313 [00:41<00:00,  7.55it/s, loss=0.1553]
Epoch 6 | Training loss: 0.1553, validation accuracy: 0.8696, validation loss: 0.4717
100%|██████████| 313/313 [00:41<00:00,  7.52it/s, loss=0.1507]
Epoch 7 | Training loss: 0.1507, validation

In [38]:
train_score = trainer.validation(model, dl["train"])
valid_score = trainer.validation(model, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.968475, 0.09036123336553574)
Valid: (0.8692, 0.5477062431335449)


In [39]:
model_acc = deepcopy(model)
optimizer.accelerate()
target_groups = [list(model_acc.features.parameters()), list(model_acc.classifier.parameters())]
optimizer.store_parameters(target_groups)

In [40]:
model_acc.to(trainer.device)
train_score = trainer.validation(model_acc, dl["train"])
valid_score = trainer.validation(model_acc, dl["valid"])
logger.log("Train:", train_score)
logger.log("Valid:", valid_score)

Train: (0.97175, 0.08393800009787082)
Valid: (0.872, 0.5277702535629273)


In [41]:
exit