In [1]:
import sys
sys.path.insert(0, "../..")

In [2]:
import torch
from torch import nn
from torch.utils import data
from torchvision import datasets, transforms
from tqdm.notebook import tqdm
import numpy as np
from copy import deepcopy

from nn_extrapolation import AcceleratedSGD

In [3]:
torch.cuda.is_available()

True

In [4]:
val_loss_fn = nn.NLLLoss(reduction="sum")

def validation(model, loader):
    ok = 0
    loss_sum = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for x, y in loader:
            x = x.cuda()
            y = y.cuda()
            out = model(x)
            loss_sum += val_loss_fn(out, y)
            preds = out.argmax(1)
            ok += (y == preds).sum()
            total += len(y)
    return ok / total, loss_sum / total

def train_epoch(loss_log):
    model.train()
    for x, y in train_loader:
        x = x.cuda()
        y = y.cuda()
        optimizer.zero_grad()
        out = model(x)
        loss = loss_fn(out, y)
        loss_log += list(loss.flatten().cpu().detach().numpy())
        loss.backward()
        optimizer.step()

In [5]:
train_ds = datasets.MNIST("../../../MNIST", download=True, train=True, transform=transforms.ToTensor())
test_ds = datasets.MNIST("../../../MNIST", download=True, train=False, transform=transforms.ToTensor())
valid_size = int(0.2 * len(train_ds))
train_ds, valid_ds = data.random_split(train_ds, [len(train_ds) - valid_size, valid_size])

train_loader = data.DataLoader(train_ds, batch_size=64, shuffle=True, num_workers=2)
valid_loader = data.DataLoader(valid_ds, batch_size=64, shuffle=True, num_workers=2)
test_loader = data.DataLoader(test_ds, batch_size=64, shuffle=False, num_workers=2)

## Levin t

In [6]:
model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(28*28, 512),
    nn.ReLU(),
    nn.Linear(512, 10),
    nn.LogSoftmax(-1),
)
model.cuda()

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=512, bias=True)
  (2): ReLU()
  (3): Linear(in_features=512, out_features=10, bias=True)
  (4): LogSoftmax(dim=-1)
)

In [7]:
optimizer = AcceleratedSGD(model.parameters(), 1e-3, k=10, mode="epoch", method="Levin:t")
loss_fn = nn.NLLLoss()

In [8]:
log_file = open("SGD-2l-Levin:t.txt", "w")

In [9]:
epochs = 30

for epoch in range(epochs):
    print("Epoch", epoch+1)
    loss_log = []
    train_epoch(loss_log)
    print(f"Training loss: {np.mean(loss_log):.4f}")
    optimizer.finish_epoch()
    val_acc, val_loss = validation(model, valid_loader)
    print(f"Validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")
    print("Epoch", epoch+1, 
          f"Training loss: {np.mean(loss_log):.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}",
          file=log_file, flush=True
         )

Epoch 1
Training loss: 2.2107
Validation accuracy: 0.6197, validation loss: 2.1172
Epoch 2
Training loss: 2.0145
Validation accuracy: 0.7005, validation loss: 1.8979
Epoch 3
Training loss: 1.7735
Validation accuracy: 0.7360, validation loss: 1.6366
Epoch 4
Training loss: 1.5096
Validation accuracy: 0.7651, validation loss: 1.3766
Epoch 5
Training loss: 1.2696
Validation accuracy: 0.7934, validation loss: 1.1602
Epoch 6
Training loss: 1.0803
Validation accuracy: 0.8152, validation loss: 0.9967
Epoch 7
Training loss: 0.9401
Validation accuracy: 0.8288, validation loss: 0.8771
Epoch 8
Training loss: 0.8369
Validation accuracy: 0.8405, validation loss: 0.7887
Epoch 9
Training loss: 0.7597
Validation accuracy: 0.8482, validation loss: 0.7215
Epoch 10
Training loss: 0.7003
Validation accuracy: 0.8557, validation loss: 0.6691
Epoch 11
Training loss: 0.6535
Validation accuracy: 0.8597, validation loss: 0.6273
Epoch 12
Training loss: 0.6157
Validation accuracy: 0.8642, validation loss: 0.5931
E

In [10]:
train_score = validation(model, train_loader)
valid_score = validation(model, valid_loader)
print("Train:", train_score)
print("Valid:", valid_score)
print("Train:", train_score, flush=True, file=log_file)
print("Valid:", valid_score, flush=True, file=log_file)

Train: (tensor(0.8935, device='cuda:0'), tensor(0.3892, device='cuda:0'))
Valid: (tensor(0.8979, device='cuda:0'), tensor(0.3846, device='cuda:0'))


In [11]:
optimizer.accelerate()

In [12]:
optimizer.store_parameters()
model.cuda()

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=512, bias=True)
  (2): ReLU()
  (3): Linear(in_features=512, out_features=10, bias=True)
  (4): LogSoftmax(dim=-1)
)

In [13]:
train_score = validation(model, train_loader)
valid_score = validation(model, valid_loader)
print("Train:", train_score)
print("Valid:", valid_score)
print("Train:", train_score, flush=True, file=log_file)
print("Valid:", valid_score, flush=True, file=log_file)

Train: (tensor(0.8823, device='cuda:0'), tensor(0.4170, device='cuda:0'))
Valid: (tensor(0.8863, device='cuda:0'), tensor(0.4123, device='cuda:0'))


## Levin u

In [14]:
model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(28*28, 512),
    nn.ReLU(),
    nn.Linear(512, 10),
    nn.LogSoftmax(-1),
)
model.cuda()

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=512, bias=True)
  (2): ReLU()
  (3): Linear(in_features=512, out_features=10, bias=True)
  (4): LogSoftmax(dim=-1)
)

In [15]:
optimizer = AcceleratedSGD(model.parameters(), 1e-3, k=10, mode="epoch", method="Levin:u")
loss_fn = nn.NLLLoss()

In [16]:
log_file = open("SGD-2l-Levin:u.txt", "w")

In [17]:
epochs = 30

for epoch in range(epochs):
    print("Epoch", epoch+1)
    loss_log = []
    train_epoch(loss_log)
    print(f"Training loss: {np.mean(loss_log):.4f}")
    optimizer.finish_epoch()
    val_acc, val_loss = validation(model, valid_loader)
    print(f"Validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")
    print("Epoch", epoch+1, 
          f"Training loss: {np.mean(loss_log):.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}",
          file=log_file, flush=True
         )

Epoch 1
Training loss: 2.2245
Validation accuracy: 0.6273, validation loss: 2.1346
Epoch 2
Training loss: 2.0353
Validation accuracy: 0.7180, validation loss: 1.9205
Epoch 3
Training loss: 1.7975
Validation accuracy: 0.7447, validation loss: 1.6603
Epoch 4
Training loss: 1.5322
Validation accuracy: 0.7665, validation loss: 1.3968
Epoch 5
Training loss: 1.2872
Validation accuracy: 0.7907, validation loss: 1.1744
Epoch 6
Training loss: 1.0923
Validation accuracy: 0.8130, validation loss: 1.0064
Epoch 7
Training loss: 0.9477
Validation accuracy: 0.8306, validation loss: 0.8832
Epoch 8
Training loss: 0.8418
Validation accuracy: 0.8381, validation loss: 0.7925
Epoch 9
Training loss: 0.7626
Validation accuracy: 0.8442, validation loss: 0.7237
Epoch 10
Training loss: 0.7020
Validation accuracy: 0.8537, validation loss: 0.6703
Epoch 11
Training loss: 0.6544
Validation accuracy: 0.8593, validation loss: 0.6278
Epoch 12
Training loss: 0.6160
Validation accuracy: 0.8648, validation loss: 0.5932
E

In [18]:
train_score = validation(model, train_loader)
valid_score = validation(model, valid_loader)
print("Train:", train_score)
print("Valid:", valid_score)
print("Train:", train_score, flush=True, file=log_file)
print("Valid:", valid_score, flush=True, file=log_file)

Train: (tensor(0.8941, device='cuda:0'), tensor(0.3884, device='cuda:0'))
Valid: (tensor(0.8998, device='cuda:0'), tensor(0.3835, device='cuda:0'))


In [19]:
optimizer.accelerate()

In [20]:
optimizer.store_parameters()
model.cuda()

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=512, bias=True)
  (2): ReLU()
  (3): Linear(in_features=512, out_features=10, bias=True)
  (4): LogSoftmax(dim=-1)
)

In [21]:
train_score = validation(model, train_loader)
valid_score = validation(model, valid_loader)
print("Train:", train_score)
print("Valid:", valid_score)
print("Train:", train_score, flush=True, file=log_file)
print("Valid:", valid_score, flush=True, file=log_file)

Train: (tensor(0.6390, device='cuda:0'), tensor(0.9743, device='cuda:0'))
Valid: (tensor(0.6349, device='cuda:0'), tensor(0.9715, device='cuda:0'))


## Levin v

In [22]:
model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(28*28, 512),
    nn.ReLU(),
    nn.Linear(512, 10),
    nn.LogSoftmax(-1),
)
model.cuda()

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=512, bias=True)
  (2): ReLU()
  (3): Linear(in_features=512, out_features=10, bias=True)
  (4): LogSoftmax(dim=-1)
)

In [23]:
optimizer = AcceleratedSGD(model.parameters(), 1e-3, k=10, mode="epoch", method="Levin:v")
loss_fn = nn.NLLLoss()

In [24]:
log_file = open("SGD-2l-Levin:v.txt", "w")

In [25]:
epochs = 30

for epoch in range(epochs):
    print("Epoch", epoch+1)
    loss_log = []
    train_epoch(loss_log)
    print(f"Training loss: {np.mean(loss_log):.4f}")
    optimizer.finish_epoch()
    val_acc, val_loss = validation(model, valid_loader)
    print(f"Validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}")
    print("Epoch", epoch+1, 
          f"Training loss: {np.mean(loss_log):.4f}, validation accuracy: {val_acc:.4f}, validation loss: {val_loss:.4f}",
          file=log_file, flush=True
         )

Epoch 1
Training loss: 2.2161
Validation accuracy: 0.6587, validation loss: 2.1263
Epoch 2
Training loss: 2.0242
Validation accuracy: 0.7496, validation loss: 1.9101
Epoch 3
Training loss: 1.7842
Validation accuracy: 0.7591, validation loss: 1.6476
Epoch 4
Training loss: 1.5174
Validation accuracy: 0.7756, validation loss: 1.3837
Epoch 5
Training loss: 1.2733
Validation accuracy: 0.8013, validation loss: 1.1632
Epoch 6
Training loss: 1.0804
Validation accuracy: 0.8215, validation loss: 0.9966
Epoch 7
Training loss: 0.9373
Validation accuracy: 0.8359, validation loss: 0.8748
Epoch 8
Training loss: 0.8324
Validation accuracy: 0.8439, validation loss: 0.7847
Epoch 9
Training loss: 0.7539
Validation accuracy: 0.8517, validation loss: 0.7164
Epoch 10
Training loss: 0.6938
Validation accuracy: 0.8600, validation loss: 0.6635
Epoch 11
Training loss: 0.6467
Validation accuracy: 0.8637, validation loss: 0.6213
Epoch 12
Training loss: 0.6088
Validation accuracy: 0.8683, validation loss: 0.5871
E

In [26]:
train_score = validation(model, train_loader)
valid_score = validation(model, valid_loader)
print("Train:", train_score)
print("Valid:", valid_score)
print("Train:", train_score, flush=True, file=log_file)
print("Valid:", valid_score, flush=True, file=log_file)

Train: (tensor(0.8942, device='cuda:0'), tensor(0.3863, device='cuda:0'))
Valid: (tensor(0.9002, device='cuda:0'), tensor(0.3817, device='cuda:0'))


In [27]:
optimizer.accelerate()

In [28]:
optimizer.store_parameters()
model.cuda()

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=512, bias=True)
  (2): ReLU()
  (3): Linear(in_features=512, out_features=10, bias=True)
  (4): LogSoftmax(dim=-1)
)

In [29]:
train_score = validation(model, train_loader)
valid_score = validation(model, valid_loader)
print("Train:", train_score)
print("Valid:", valid_score)
print("Train:", train_score, flush=True, file=log_file)
print("Valid:", valid_score, flush=True, file=log_file)

Train: (tensor(0.8910, device='cuda:0'), tensor(0.4065, device='cuda:0'))
Valid: (tensor(0.8961, device='cuda:0'), tensor(0.4015, device='cuda:0'))
