In [5]:
from torchvision import datasets, transforms
import torch

In [3]:
DATASET_PATH = '/nfs/homedirs/ayle/guided-research/SNIP-it/gitignored/data'

In [7]:
arguments = {
'eval_freq': 1000,  # evaluate every n batches
    'save_freq': 1e6,  # save model every n epochs, besides before and after training
    'batch_size': 512,  # size of batches, for Imagenette 128
    'seed': 333,  # random seed
    'max_training_minutes': 6120 , # one hour and a 45 minutes max, process killed after n minutes (after finish of epoch)
    'plot_weights_freq': 50 , # plot pictures to tensorboard every n epochs
    'prune_freq': 1 , # if pruning during training: how long to wait before starting
    'prune_delay': 0 , # "if pruning during training: 't' from algorithm box, interval between pruning events, default=0
    'epochs': 80,
    'rewind_to': 0 , # rewind to this epoch if rewinding is done
    'snip_steps': 5 , # 's' in algorithm box, number of pruning steps for 'rule of thumb', TODO
    'pruning_rate': 0.00 , # pruning rate passed to criterion at pruning event. however, most override this
    'growing_rate': 0.0000 , # grow back so much every epoch (for future criterions)
    'pruning_limit': 0.00,  # Prune until here, if structured in nodes, if unstructured in weights. most criterions use this instead of the pruning_rate
    'learning_rate': 2e-3,
    'grad_clip': 10,
    'grad_noise': 0 , # added gaussian noise to gradients
    'l2_reg': 5e-5 , # weight decay
    'l1_reg': 0 , # l1-norm regularisation
    'lp_reg': 0 , # lp regularisation with p < 1
    'l0_reg': 1.0 , # l0 reg lambda hyperparam
    'hoyer_reg': 1.0 , # hoyer reg lambda hyperparam
    'beta_ema': 0.999 , # l0 reg beta ema hyperparam

    'loss': 'CrossEntropy',
    'optimizer': 'ADAM',
    'model': 'LeNet5',  # ResNet not supported with structured
    'data_set': 'MNIST',
    'ood_data_set': 'FASHION',
    'prune_criterion': 'EmptyCrit',  # options: SNIP, SNIPit, SNIPitDuring, UnstructuredRandom, GRASP, HoyerSquare, IMP, // SNAPit, StructuredRandom, GateDecorators, EfficientConvNets, GroupHoyerSquare
    'train_scheme': 'DefaultTrainer' , # default: DefaultTrainer
    'test_scheme': 'AdversarialEvaluation'  ,# only supported on unstructured?
    'attack': 'FGSM',
    'epsilons': [0.25],

    'device': 'cuda',
    'run_name': "",

    'checkpoint_name': '2021-04-25_16.04.37_lenet5_dense',
    'checkpoint_model': 'LeNet5_finished',

    'disable_cuda_benchmark': 1 , # speedup (disable) vs reproducibility (leave it)
    'eval': 0,
    'disable_autoconfig': 0 , # for the brave
    'preload_all_data': 0 , # load all data into ram memory for speedups
    'tuning': 0 , # splits trainset into train and validationset, omits test set

    'track_weights': 0 , # "keep statistics on the weights through training
    'disable_masking': 1 , # disable the ability to prune unstructured
    'enable_rewinding': 1 , # enable the ability to rewind to previous weights
    'outer_layer_pruning': 1 , # allow to prune outer layers (unstructured) or not (structured)
    'random_shuffle_labels': 0  ,# run with random-label experiment from zhang et al
    'l0': 0,  # run with l0 criterion, might overwrite some other arguments
    'hoyer_square': 0 , # "run in unstructured DeephoyerSquare criterion, might overwrite some other arguments
    'group_hoyer_square': 0 ,# run in unstructured Group-DeephoyerSquare criterion, might overwrite some other arguments

    'disable_histograms': 1,
    'disable_saliency': 1,
    'disable_confusion': 1,
    'disable_weightplot': 1,
    'disable_netplot': 1,
    'skip_first_plot': 1,
    
    'input_dim': [1, 28, 28],
      'output_dim': 10,
      'hidden_dim': [512],
      'N': 60000,
}

In [8]:
transformers = transforms.Compose([transforms.ToTensor()])
train_set = datasets.KMNIST(
    DATASET_PATH,
    train=True,
    download=True,
    transform=transformers
)
# test_set = datasets.SVHN(
#     DATASET_PATH,
#     split='test',
#     download=True,
#     transform=transformers
# )
train_loader = torch.utils.data.DataLoader(
        train_set,
        batch_size=arguments['batch_size'],
        shuffle=True,
        pin_memory=True,
        num_workers=6
    )

In [9]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
h, w = 0, 0

In [11]:
for batch_idx, (inputs, targets) in enumerate(train_loader):
    inputs = inputs.to(device)
    if batch_idx == 0:
        h, w = inputs.size(2), inputs.size(3)
        print(inputs.min(), inputs.max())
        chsum = inputs.sum(dim=(0, 2, 3), keepdim=True)
    else:
        chsum += inputs.sum(dim=(0, 2, 3), keepdim=True)
mean = chsum/len(train_set)/h/w
print('mean: %s' % mean.view(-1))

chsum = None
for batch_idx, (inputs, targets) in enumerate(train_loader):
    inputs = inputs.to(device)
    if batch_idx == 0:
        chsum = (inputs - mean).pow(2).sum(dim=(0, 2, 3), keepdim=True)
    else:
        chsum += (inputs - mean).pow(2).sum(dim=(0, 2, 3), keepdim=True)
std = torch.sqrt(chsum/(len(train_set) * h * w - 1))
print('std: %s' % std.view(-1))

tensor(0., device='cuda:0') tensor(1., device='cuda:0')
mean: tensor([0.1918], device='cuda:0')
std: tensor([0.3483], device='cuda:0')
