In [1]:
%load_ext autoreload
%autoreload 2
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.append(project_root)

In [2]:
import torch
from src.optimizers.LagrangianTrainer import LagrangianTrainer
from src.optimizers.SimpleTrainer import SimpleTrainer
from src.utils import dataset
from src.optimizers.HypercubeTrainer import HypercubeTrainer
from src.utils.evaluation import evaluate_accuracy
from src.cert import Safebox

In [3]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [4]:
def get_model(output_dim=10):
    """Returns a simple CNN model."""
    model = torch.nn.Sequential(
        torch.nn.Conv2d(1, 8, kernel_size=5, stride=1, padding=1),
        torch.nn.ReLU(),
        torch.nn.Conv2d(8, 1, kernel_size=5, stride=1, padding=1),
        torch.nn.ReLU(),
        torch.nn.Flatten(),
        torch.nn.Linear(576, output_dim),
    ).to(DEVICE)
    return model
def get_model_cifar10(seed=0, output_dim=10):
    """Returns a simple CNN model."""
    torch.manual_seed(seed)
    model = torch.nn.Sequential(
        torch.nn.Conv2d(3, 8, kernel_size=5, stride=1, padding=1),
        torch.nn.ReLU(),
        torch.nn.Conv2d(8, 3, kernel_size=5, stride=1, padding=1),
        torch.nn.ReLU(),
        torch.nn.Flatten(),
        torch.nn.Linear(2352, output_dim),
    ).to(DEVICE)
    return model



In [5]:
from src.utils.dataset import reduce_dataset

train_dataset, val_dataset = dataset.get_fashion_mnist_dataset()
#train_dataset = reduce_dataset(train_dataset, num_samples=300)

In [8]:
simple_model = get_model()
trainer = SimpleTrainer(simple_model, device=DEVICE)
simple_model = trainer.train(train_dataset, val_dataset, loss_obj=0.000000000000001, max_iters=3000, batch_size=64, lr=1e-3)

100%|██████████| 3000/3000 [01:30<00:00, 33.29it/s, loss=0.477, val_acc=0.891]

----------  Training completed with loss  0 ----------





In [15]:
print("Accuracy Simple Training", evaluate_accuracy(val_dataset, simple_model, num_samples=len(val_dataset), device=DEVICE))

Accuracy Simple Training 0.8112999796867371


In [25]:
bmodel_test = Safebox.modelToBModel(simple_model)
Safebox.assign_epsilon(bmodel_test, 1e-4*1.5)
trainer = HypercubeTrainer(simple_model, "cuda", False)
trainer._interval_model = bmodel_test
print(trainer._evaluate_min_val_acc(val_dataset, len(val_dataset)))

0.6250999569892883


In [23]:
model = get_model(output_dim=10)
optimizer = HypercubeTrainer(model, device=DEVICE)
optimizer.set_volume_constrain(1e-4*1.5) # start with a small volume at first 
optimizer.train(
    train_dataset, val_dataset, loss_obj=0.000000000000001, max_iters=3000, batch_size=64, lr=1e-4
)

100%|██████████| 3000/3000 [02:59<00:00, 16.69it/s, loss=0.588, min_val_acc=0.719]

----------  Training completed with loss  1 ----------





In [None]:
optimizer.set_volume_constrain(1e-4*5.0) # start with a small volume at first 
optimizer.train(
    train_dataset, val_dataset, loss_obj=0.000000000000001, max_iters=3000, batch_size=64, lr=1e-4
)

 13%|█▎        | 378/3000 [00:18<02:00, 21.81it/s, loss=0.61, min_val_acc=0.719] 

In [44]:
optimizer._evaluate_min_val_acc(val_dataset, len(val_dataset))

0.7630999684333801

In [45]:
model = Safebox.bmodelToModel(optimizer.result())
print("Accuracy ", evaluate_accuracy(val_dataset, model, num_samples=len(val_dataset)))

Accuracy  0.8331000208854675


In [36]:
for layerSimple, layerFlat in zip(simple_model, optimizer.result()):
    if isinstance(layerSimple, torch.nn.Linear) or isinstance(layerSimple, torch.nn.Conv2d): 
        print("--- New Layer ---")
        print("Simple weight : ", layerSimple.weight.mean().round(decimals=6).item(), " std ", layerSimple.weight.std().round(decimals=6).item())
        print("Flat weight : ", layerFlat.W_c.mean().round(decimals=6).item(), " std ", layerFlat.W_c.std().round(decimals=6).item())
        print("Simple bias : ", layerSimple.bias.mean().round(decimals=6).item(), " std ", layerSimple.bias.std().round(decimals=6).item())
        print("Flat bias : ", layerFlat.b_c.mean().round(decimals=6).item(), " std ", layerFlat.b_c.std().round(decimals=6).item())


--- New Layer ---
Simple weight :  -0.02653699927031994  std  0.13423000276088715
Flat weight :  0.018327999860048294  std  0.14879299700260162
Simple bias :  0.05447600036859512  std  0.1779630035161972
Flat bias :  -0.046386998146772385  std  0.13203200697898865
--- New Layer ---
Simple weight :  0.024111000820994377  std  0.06942799687385559
Flat weight :  0.007294999901205301  std  0.07857400178909302
Simple bias :  0.11389800161123276  std  nan
Flat bias :  -0.05412000045180321  std  nan
--- New Layer ---
Simple weight :  -0.006694999989122152  std  0.03820300102233887
Flat weight :  -0.005133999977260828  std  0.0362280011177063
Simple bias :  -0.0016080000204965472  std  0.029248999431729317
Flat bias :  -0.014062999747693539  std  0.04829400032758713


  print("Simple bias : ", layerSimple.bias.mean().round(decimals=6).item(), " std ", layerSimple.bias.std().round(decimals=6).item())
  print("Flat bias : ", layerFlat.b_c.mean().round(decimals=6).item(), " std ", layerFlat.b_c.std().round(decimals=6).item())


### Lagrangian experiments

In [8]:
from src.optimizers.volumes import LogVolume

model = get_model(output_dim=10)
optimizer = LagrangianTrainer(model,LogVolume(epsilon=1e-8), device=DEVICE)
optimizer.set_volume_constrain(1e-4) # start with a small volume at first
print(optimizer._volume_function(optimizer._interval_model))
optimizer.train(
    train_dataset, val_dataset, loss_obj=-0.000000000000001, max_iters=3000, batch_size=64, lr=1e-4
)

tensor(-8.5171, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 3000/3000 [03:44<00:00, 13.37it/s, loss=0.405, min_val_acc=0.531, current_volume=-8.53] 

----------  Training completed with loss  0 ----------





In [9]:
interval_model = optimizer.result()
print("Accuracy ", evaluate_accuracy(val_dataset, Safebox.bmodelToModel(interval_model), num_samples=len(val_dataset)))
for layer in interval_model:
    if isinstance(layer, Safebox.BDense) or isinstance(layer, Safebox.BConv2d):
        print("Mean layer intervals W_u", layer.W_u.mean(), " std ", layer.W_u.std())
        print("Mean layer intervals W_l", layer.W_l.mean(), " std ", layer.W_l.std())
        print("Mean layer intervals b_u", layer.b_u.mean(), " std ", layer.b_u.std())
        print("Mean layer intervals b_l", layer.b_l.mean(), " std ", layer.b_l.std())

Accuracy  0.7109000086784363
Mean layer intervals W_u tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)  std  tensor(0.0002, device='cuda:0', grad_fn=<StdBackward0>)
Mean layer intervals W_l tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)  std  tensor(0.0002, device='cuda:0', grad_fn=<StdBackward0>)
Mean layer intervals b_u tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)  std  tensor(0.0002, device='cuda:0', grad_fn=<StdBackward0>)
Mean layer intervals b_l tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)  std  tensor(0.0002, device='cuda:0', grad_fn=<StdBackward0>)
Mean layer intervals W_u tensor(0.0003, device='cuda:0', grad_fn=<MeanBackward0>)  std  tensor(0.0003, device='cuda:0', grad_fn=<StdBackward0>)
Mean layer intervals W_l tensor(0.0003, device='cuda:0', grad_fn=<MeanBackward0>)  std  tensor(0.0003, device='cuda:0', grad_fn=<StdBackward0>)
Mean layer intervals b_u tensor(6.2757e-05, device='cuda:0', grad_fn=<MeanBackward0>)  std  tensor(nan, dev

  print("Mean layer intervals b_u", layer.b_u.mean(), " std ", layer.b_u.std())
  print("Mean layer intervals b_l", layer.b_l.mean(), " std ", layer.b_l.std())


In [23]:
model = Safebox.bmodelToModel(optimizer.result())
print("Accuracy ", evaluate_accuracy(val_dataset, model, num_samples=10000))

Accuracy  0.12359999865293503
