In [1]:
%load_ext autoreload
%autoreload 2
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.append(project_root)

In [2]:
import torch
from src.optimizers.LagrangianTrainer import LagrangianTrainer
from src.optimizers.SimpleTrainer import SimpleTrainer
from src.utils import dataset
from src.optimizers.HypercubeTrainer import HypercubeTrainer
from src.utils.evaluation import evaluate_accuracy
from src.cert import Safebox

In [3]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [4]:
def get_model(output_dim=10):
    """Returns a simple CNN model."""
    model = torch.nn.Sequential(
        torch.nn.Conv2d(1, 8, kernel_size=5, stride=1, padding=1),
        torch.nn.ReLU(),
        torch.nn.Conv2d(8, 1, kernel_size=5, stride=1, padding=1),
        torch.nn.ReLU(),
        torch.nn.Flatten(),
        torch.nn.Linear(576, output_dim),
    ).to(DEVICE)
    return model
def get_model_cifar10(seed=0, output_dim=10):
    """Returns a simple CNN model."""
    torch.manual_seed(seed)
    model = torch.nn.Sequential(
        torch.nn.Conv2d(3, 8, kernel_size=5, stride=1, padding=1),
        torch.nn.ReLU(),
        torch.nn.Conv2d(8, 3, kernel_size=5, stride=1, padding=1),
        torch.nn.ReLU(),
        torch.nn.Flatten(),
        torch.nn.Linear(2352, output_dim),
    ).to(DEVICE)
    return model



In [5]:
from src.utils.dataset import reduce_dataset

train_dataset, val_dataset = dataset.get_fashion_mnist_dataset()
train_dataset = reduce_dataset(train_dataset, num_samples=300)

In [32]:
simple_model = get_model()
trainer = SimpleTrainer(simple_model, device=DEVICE)
simple_model = trainer.train(train_dataset, val_dataset, loss_obj=0.000000000000001, max_iters=3000, batch_size=64, lr=1e-4)

100%|██████████| 3000/3000 [01:21<00:00, 36.89it/s, loss=0.0961, val_acc=0.719]

----------  Training completed with loss  0 ----------





In [36]:
print("Accuracy Simple Training", evaluate_accuracy(val_dataset, simple_model, num_samples=len(val_dataset), device=DEVICE))

Accuracy Simple Training 0.7096999883651733


In [34]:
model = get_model(output_dim=10)
optimizer = HypercubeTrainer(model, device=DEVICE)
optimizer.set_volume_constrain(1e-5*5) # start with a small volume at first 
optimizer.train(
    train_dataset, val_dataset, loss_obj=0.000000000000001, max_iters=3000, batch_size=64, lr=1e-4
)

100%|██████████| 3000/3000 [02:41<00:00, 18.58it/s, loss=0.0361, min_val_acc=0.594]

----------  Training completed with loss  0 ----------





In [37]:
model = Safebox.bmodelToModel(optimizer.result())
print("Accuracy ", evaluate_accuracy(val_dataset, model, num_samples=len(val_dataset)))

Accuracy  0.725600004196167


### Lagrangian experiments

In [8]:
from src.optimizers.volumes import LogVolume

model = get_model(output_dim=10)
optimizer = LagrangianTrainer(model,LogVolume(epsilon=1e-8), device=DEVICE)
optimizer.set_volume_constrain(1e-4) # start with a small volume at first
print(optimizer._volume_function(optimizer._interval_model))
optimizer.train(
    train_dataset, val_dataset, loss_obj=-0.000000000000001, max_iters=3000, batch_size=64, lr=1e-4
)

tensor(-8.5171, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 3000/3000 [03:44<00:00, 13.37it/s, loss=0.405, min_val_acc=0.531, current_volume=-8.53] 

----------  Training completed with loss  0 ----------





In [9]:
interval_model = optimizer.result()
print("Accuracy ", evaluate_accuracy(val_dataset, Safebox.bmodelToModel(interval_model), num_samples=len(val_dataset)))
for layer in interval_model:
    if isinstance(layer, Safebox.BDense) or isinstance(layer, Safebox.BConv2d):
        print("Mean layer intervals W_u", layer.W_u.mean(), " std ", layer.W_u.std())
        print("Mean layer intervals W_l", layer.W_l.mean(), " std ", layer.W_l.std())
        print("Mean layer intervals b_u", layer.b_u.mean(), " std ", layer.b_u.std())
        print("Mean layer intervals b_l", layer.b_l.mean(), " std ", layer.b_l.std())

Accuracy  0.7109000086784363
Mean layer intervals W_u tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)  std  tensor(0.0002, device='cuda:0', grad_fn=<StdBackward0>)
Mean layer intervals W_l tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)  std  tensor(0.0002, device='cuda:0', grad_fn=<StdBackward0>)
Mean layer intervals b_u tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)  std  tensor(0.0002, device='cuda:0', grad_fn=<StdBackward0>)
Mean layer intervals b_l tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)  std  tensor(0.0002, device='cuda:0', grad_fn=<StdBackward0>)
Mean layer intervals W_u tensor(0.0003, device='cuda:0', grad_fn=<MeanBackward0>)  std  tensor(0.0003, device='cuda:0', grad_fn=<StdBackward0>)
Mean layer intervals W_l tensor(0.0003, device='cuda:0', grad_fn=<MeanBackward0>)  std  tensor(0.0003, device='cuda:0', grad_fn=<StdBackward0>)
Mean layer intervals b_u tensor(6.2757e-05, device='cuda:0', grad_fn=<MeanBackward0>)  std  tensor(nan, dev

  print("Mean layer intervals b_u", layer.b_u.mean(), " std ", layer.b_u.std())
  print("Mean layer intervals b_l", layer.b_l.mean(), " std ", layer.b_l.std())


In [23]:
model = Safebox.bmodelToModel(optimizer.result())
print("Accuracy ", evaluate_accuracy(val_dataset, model, num_samples=10000))

Accuracy  0.12359999865293503
