In [1]:
from cooper.formulations import Lagrangian

from src.optimizers.LagrangianTrainer import LagrangianTrainer
from src.optimizers.SimpleTrainer import SimpleTrainer
%load_ext autoreload
%autoreload 2
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.append(project_root)

In [2]:
import torch
from src.utils import dataset
from src.optimizers.HypercubeTrainer import HypercubeTrainer
from src.utils.evaluation import evaluate_accuracy
from src.cert import Safebox

In [3]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [4]:
def get_model(seed=0, output_dim=10):
    """Returns a simple CNN model."""
    torch.manual_seed(seed)
    model = torch.nn.Sequential(
        torch.nn.Conv2d(1, 8, kernel_size=5, stride=1, padding=1),
        torch.nn.ReLU(),
        torch.nn.Conv2d(8, 1, kernel_size=5, stride=1, padding=1),
        torch.nn.ReLU(),
        torch.nn.Flatten(),
        torch.nn.Linear(576, output_dim),
    ).to(DEVICE)
    return model
def get_model_cifar10(seed=0, output_dim=10):
    """Returns a simple CNN model."""
    torch.manual_seed(seed)
    model = torch.nn.Sequential(
        torch.nn.Conv2d(3, 8, kernel_size=5, stride=1, padding=1),
        torch.nn.ReLU(),
        torch.nn.Conv2d(8, 3, kernel_size=5, stride=1, padding=1),
        torch.nn.ReLU(),
        torch.nn.Flatten(),
        torch.nn.Linear(2352, output_dim),
    ).to(DEVICE)
    return model



In [5]:
from src.utils.dataset import reduce_dataset

train_dataset, val_dataset = dataset.get_fashion_mnist_dataset()
train_dataset = reduce_dataset(train_dataset, num_samples=300)

100%|██████████| 26.4M/26.4M [00:01<00:00, 22.4MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 684kB/s]
100%|██████████| 4.42M/4.42M [00:00<00:00, 13.4MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 541kB/s]


In [6]:
simple_model = get_model()
trainer = SimpleTrainer(simple_model, device=DEVICE)
simple_model = trainer.train(train_dataset, val_dataset, loss_obj=0.000000000000001, max_iters=3000, batch_size=64, lr=1e-4)

100%|██████████| 3000/3000 [01:24<00:00, 35.60it/s, loss=0.115, val_acc=0.719] 

----------  Training completed with loss  0 ----------





In [33]:
print("Accuracy Simple Training", evaluate_accuracy(val_dataset, simple_model, num_samples=10000, device=DEVICE))

Accuracy Simple Training 0.7263000011444092


In [34]:
model = get_model(output_dim=10)
optimizer = HypercubeTrainer(model, device=DEVICE)
optimizer.set_volume_constrain(1e-5*5) # start with a small volume at first 
optimizer.train(
    train_dataset, val_dataset, loss_obj=0.000000000000001, max_iters=3000, batch_size=64, lr=1e-4
)

100%|██████████| 3000/3000 [03:05<00:00, 16.21it/s, loss=0.0414, min_val_acc=0.609]

----------  Training completed with loss  0 ----------





In [35]:
model = Safebox.bmodelToModel(optimizer.result())
print("Accuracy ", evaluate_accuracy(val_dataset, model, num_samples=10000))

Accuracy  0.7210000157356262


##### Lagrangian experiments

In [37]:
model = get_model(output_dim=10).to('cpu')
optimizer = LagrangianTrainer(model, device='cpu')
optimizer.set_volume_constrain(1e-5*5) # start with a small volume at first
optimizer.train(
    train_dataset, val_dataset, loss_obj=0.000000000000001, max_iters=3000, batch_size=64, lr=1e-4
)

  0%|          | 3/3000 [00:00<01:51, 26.94it/s, loss=nan, min_val_acc=0.188] 

tensor(2.4332, grad_fn=<NllLossBackward0>)
tensor(2.2188, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)


  0%|          | 9/3000 [00:00<01:53, 26.24it/s, loss=nan, min_val_acc=0.109] 

tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)


  0%|          | 15/3000 [00:00<01:55, 25.82it/s, loss=nan, min_val_acc=0.0781]

tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)


  1%|          | 21/3000 [00:00<01:53, 26.15it/s, loss=nan, min_val_acc=0.172] 

tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)


  1%|          | 27/3000 [00:01<01:53, 26.14it/s, loss=nan, min_val_acc=0.0938]

tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)


  1%|          | 33/3000 [00:01<01:54, 25.83it/s, loss=nan, min_val_acc=0.109] 

tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)


  1%|▏         | 39/3000 [00:01<01:52, 26.36it/s, loss=nan, min_val_acc=0.0469]

tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)


  2%|▏         | 45/3000 [00:01<01:51, 26.60it/s, loss=nan, min_val_acc=0.109] 

tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)


  2%|▏         | 51/3000 [00:02<01:49, 26.89it/s, loss=nan, min_val_acc=0.109] 

tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)


  2%|▏         | 57/3000 [00:02<01:52, 26.19it/s, loss=nan, min_val_acc=0.0469]

tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)


  2%|▏         | 63/3000 [00:02<01:53, 25.81it/s, loss=nan, min_val_acc=0.0625]

tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)


  2%|▏         | 69/3000 [00:02<01:53, 25.77it/s, loss=nan, min_val_acc=0.109] 

tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)


  2%|▎         | 75/3000 [00:02<01:53, 25.74it/s, loss=nan, min_val_acc=0.0625]

tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)


  3%|▎         | 81/3000 [00:03<01:52, 25.88it/s, loss=nan, min_val_acc=0.0938]

tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)


  3%|▎         | 87/3000 [00:03<01:52, 25.93it/s, loss=nan, min_val_acc=0.0781]

tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)


  3%|▎         | 93/3000 [00:03<01:51, 25.97it/s, loss=nan, min_val_acc=0.0781]

tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)


  3%|▎         | 99/3000 [00:03<01:54, 25.25it/s, loss=nan, min_val_acc=0.0938]

tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)


  4%|▎         | 105/3000 [00:04<01:53, 25.54it/s, loss=nan, min_val_acc=0.0781]

tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)


  4%|▎         | 110/3000 [00:04<01:51, 25.85it/s, loss=nan, min_val_acc=0.0781]


tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)
tensor(nan, grad_fn=<NllLossBackward0>)


KeyboardInterrupt: 

In [23]:
model = Safebox.bmodelToModel(optimizer.result())
print("Accuracy ", evaluate_accuracy(val_dataset, model, num_samples=10000))

Accuracy  0.12359999865293503
