In [1]:
import os
os.chdir('/nfs/homedirs/ayle/guided-research/SNIP-it/')

In [2]:
import torch
from torchvision import datasets, transforms
import foolbox as fb
from experiments.main import load_checkpoint
from models import GeneralModel
from models.statistics.Metrics import Metrics
from utils.config_utils import *
from utils.model_utils import *
from utils.system_utils import *
from utils.attacks_utils import get_attack
from torch.utils.data.dataset import Dataset
from copy import deepcopy
import pickle
import time
import torch.nn.functional as F
from utils.metrics import calculate_aupr, calculate_auroc

In [3]:
arguments = dict({
'eval_freq': 1000,  # evaluate every n batches
    'save_freq': 1e6,  # save model every n epochs, besides before and after training
    'batch_size': 512,  # size of batches, for Imagenette 128
    'seed': 1234,  # random seed
    'max_training_minutes': 6120 , # one hour and a 45 minutes max, process killed after n minutes (after finish of epoch)
    'plot_weights_freq': 50, # plot pictures to tensorboard every n epochs
    'prune_freq': 1, # if pruning during training: how long to wait before starting
    'prune_delay': 0, # "if pruning during training: 't' from algorithm box, interval between pruning events, default=0
    'prune_to': 0,
    'epochs': 0,
    'rewind_to': 0, # rewind to this epoch if rewinding is done
    'snip_steps': 5, # 's' in algorithm box, number of pruning steps for 'rule of thumb', TODO
    'pruning_rate': 0.0, # pruning rate passed to criterion at pruning event. however, most override this
    'growing_rate': 0.0000 , # grow back so much every epoch (for future criterions)
    'pruning_limit': 0.5,  # Prune until here, if structured in nodes, if unstructured in weights. most criterions use this instead of the pruning_rate
    'local_pruning': 0,
    'learning_rate': 2e-3,
    'grad_clip': 10,
    'grad_noise': 0 , # added gaussian noise to gradients
    'l2_reg': 5e-5 , # weight decay
    'l1_reg': 0 , # l1-norm regularisation
    'lp_reg': 0 , # lp regularisation with p < 1
    'l0_reg': 1.0 , # l0 reg lambda hyperparam
    'hoyer_reg': 0.001 , # hoyer reg lambda hyperparam
    'beta_ema': 0.999 , # l0 reg beta ema hyperparam

    'loss': 'CrossEntropy',
    'optimizer': 'ADAM',
    'model': 'ResNet18',  # ResNet not supported with structured
    'data_set': 'CIFAR10',
    'ood_data_set': 'SVHN',
    'ood_data_set_prune': 'SVHN',
    'prune_criterion': 'EmptyCrit',  # options: SNIP, SNIPit, SNIPitDuring, UnstructuredRandom, GRASP, HoyerSquare, IMP, // SNAPit, StructuredRandom, GateDecorators, EfficientConvNets, GroupHoyerSquare
    'train_scheme': 'DefaultTrainer' , # default: DefaultTrainer
    'attack': 'FGSM',
    'epsilon': 12,
    'eval_ood_data_sets': ['SVHN', 'CIFAR100'],
    'eval_attacks': ['FGSM', 'L2FGSM'],
    'eval_epsilons': [6, 12, 48],

    'device': 'cuda',
    'results_dir': "tmp",

    'checkpoint_name': None,
    'checkpoint_model': None,

    'disable_cuda_benchmark': 1 , # speedup (disable) vs reproducibility (leave it)
    'eval': 0,
    'disable_autoconfig': 0 , # for the brave
    'preload_all_data': 0 , # load all data into ram memory for speedups
    'tuning': 0 , # splits trainset into train and validationset, omits test set

    'get_hooks': 0,
    'track_weights': 0 , # "keep statistics on the weights through training
    'disable_masking': 1 , # disable the ability to prune unstructured
    'enable_rewinding': 0, # enable the ability to rewind to previous weights
    'outer_layer_pruning': 1, # allow to prune outer layers (unstructured) or not (structured)
    'first_layer_dense': 0,
    'random_shuffle_labels': 0  ,# run with random-label experiment from zhang et al
    'l0': 0,  # run with l0 criterion, might overwrite some other arguments
    'hoyer_square': 0, # "run in unstructured DeephoyerSquare criterion, might overwrite some other arguments
    'group_hoyer_square': 0 ,# run in unstructured Group-DeephoyerSquare criterion, might overwrite some other arguments

    'disable_histograms': 0,
    'disable_saliency': 0,
    'disable_confusion': 0,
    'disable_weightplot': 0,
    'disable_netplot': 0,
    'skip_first_plot': 0,
    'disable_activations': 0,
    
#     'input_dim': [1, 28, 28],
#       'output_dim': 10,
#       'hidden_dim': [512],
#       'N': 60000,
    
    'input_dim': [3, 32, 32],
      'output_dim': 10,
      'hidden_dim': [512],
      'N': 60000
})

In [4]:
import logging
from sacred import Experiment
import numpy as np
import seml

import sys
import warnings

sys.path.append('.')

from models import GeneralModel
from models.statistics.Metrics import Metrics
from utils.config_utils import *
from utils.model_utils import *
from utils.system_utils import *

import torch
from torch.utils.data.dataset import Dataset

from torchvision import transforms

from lipEstimation.lipschitz_utils import compute_module_input_sizes
from lipEstimation.lipschitz_approximations import lipschitz_spectral_ub


def main(
        arguments,
        metrics: Metrics
):

    global out
    out = metrics.log_line
    out(f"starting at {get_date_stamp()}")

    # hardware
    device = configure_device(arguments)

    if arguments['disable_cuda_benchmark']:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    # for reproducibility
    configure_seeds(arguments, device)

    # filter for incompatible properties
    assert_compatibilities(arguments)

    # load pre-trained weights if specified
    path2 = '/nfs/homedirs/ayle/guided-research/SNIP-it/gitignored/results/ResNet18/2021-07-27_05.54.29_model=ResNet18_dataset=CIFAR10_prune-criterion=SNIPit_pruning-limit=0.5_prune-freq=1_prune-delay=0_outer-layer-pruning=1_prune-to=10_rewind-to=0_train-scheme=DefaultTrainer_seed=2345/models/ResNet18_mod_finished.pickle'
    path1 = '/nfs/homedirs/ayle/guided-research/SNIP-it/gitignored/results/ResNet18/2021-07-26_23.33.17_model=ResNet18_dataset=CIFAR10_prune-criterion=EmptyCrit_pruning-limit=0.0_prune-freq=1_prune-delay=0_outer-layer-pruning=1_prune-to=10_rewind-to=0_train-scheme=DefaultTrainer_seed=2345/models/ResNet18_mod_finished.pickle' 

    model1 = load_checkpoint(path1).eval()
    model2 = load_checkpoint(path2).eval()
    ensembles = [model1, model2]

    # load data
    train_loader, test_loader = find_right_model(
        DATASETS, arguments['data_set'],
        arguments=arguments
    )

    # load OOD data
    _, ood_loader = find_right_model(
        DATASETS, arguments['ood_data_set'],
        arguments=arguments
    )

    results = {}

    out("EVALUATING...")
    
    kl_loss = nn.KLDivLoss(reduction='none')
    kl_preds = np.zeros(0)
    true_labels = np.zeros(0)
    
    with torch.no_grad():
        for batch_num, batch in enumerate(test_loader):
            x, y = batch
            x, y = x.to(device), y.to(device)

            out1 = model1(x)  
            out2 = model2(x)
            
#             kl = kl_loss.forward(torch.log(F.softmax(out1, -1)), F.softmax(out2, -1)).sum(-1)
            
            kl = kl_loss.forward(torch.log(F.softmax(out2, -1)), F.softmax(out1, -1)).sum(-1)
            
            kl_preds = np.concatenate((kl_preds, kl.cpu().numpy()))
            true_labels = np.concatenate((true_labels, np.zeros(len(x))))
            
    with torch.no_grad():
        for batch_num, batch in enumerate(ood_loader):
            x, y = batch
            x, y = x.to(device), y.to(device)

            out1 = model1(x)  
            out2 = model2(x)
            
            kl = kl_loss.forward(torch.log(F.softmax(out2, -1)), F.softmax(out1, -1)).sum(-1)
            
            kl_preds = np.concatenate((kl_preds, kl.cpu().numpy()))
            true_labels = np.concatenate((true_labels, np.ones(len(x))))
            
    auroc = calculate_auroc(true_labels, kl_preds)
    aupr = calculate_aupr(true_labels, kl_preds)
    
    results['auroc'] = auroc
    results['aupr'] = aupr

    return results


def assert_compatibilities(arguments):
    check_incompatible_props([arguments['loss'] != "L0CrossEntropy", arguments['l0']], "l0", arguments['loss'])
    check_incompatible_props([arguments['train_scheme'] != "L0Trainer", arguments['l0']], "l0", arguments['train_scheme'])
    check_incompatible_props([arguments['l0'], arguments['group_hoyer_square'], arguments['hoyer_square']],
                             "Choose one mode, not multiple")
    check_incompatible_props(
        ["Structured" in arguments['prune_criterion'], "Group" in arguments['prune_criterion'], "ResNet" in arguments['model']],
        "structured", "residual connections")
    # todo: add more


def load_checkpoint(path):
    with open(path, 'rb') as f:
        model = pickle.load(f)
    return model


def log_start_run(arguments, out):
    arguments.PyTorch_version = torch.__version__
    arguments.PyThon_version = sys.version
    arguments.pwd = os.getcwd()
    out("PyTorch version:", torch.__version__, "Python version:", sys.version)
    out("Working directory: ", os.getcwd())
    out("CUDA avalability:", torch.cuda.is_available(), "CUDA version:", torch.version.cuda)
    out(arguments)

def run(arguments):
    metrics = Metrics()
    out = metrics.log_line
    metrics._batch_size = arguments['batch_size']
    metrics._eval_freq = arguments['eval_freq']
    set_results_dir(arguments["results_dir"])
    return main(arguments, metrics)

In [5]:
results = run(arguments)

starting at 2021-08-05_15.24.59
Using downloaded and verified file: gitignored/data/train_32x32.mat
Using downloaded and verified file: gitignored/data/test_32x32.mat
EVALUATING...


In [6]:
results

{'auroc': 0.863414153733866, 'aupr': 0.8998101736883848}