Evaluate the models

# Stochastic Adversarial Training (StochAT)

### SoTA

vanila SGD: 
MNIST - 99%+ (most cnns), CIFAR10 - 93%+ (resnet18), 96%+ (wideresnet) 

MNIST:

adversarial attacks: 
l-inf @ eps = 80/255 @20 steps: TRADES - 96.07% - (4 layer cnn), MART 96.4%, MMA 95.5%, PGD - 96.01% - (4 layer cnn)

adversarial attacks:
l-2 @ eps = 32/255 (check): TRADES, MMA, PGD

CIFAR10:

adversarial attacks: 
l-inf @ eps = 8/255 @20 steps: 
TRADES 53-56% - (WRN-34-10), MART 57-58% (WRN-34-10), MMA 47%, PGD 48% - (WRN-32-10)// 49% - (WRN-34-10), Std - 0.03%
https://openreview.net/pdf?id=rklOg6EFwS (Table 4)

adversarial attacks: 
l-inf @ eps = 8/255 @20 steps: 
[ResNet10] TRADES 45.4%, MART 46.6%, MMA 37.26%, PGD 42.27%, Std 0.14%

Benign accuracies: TRADES 84.92%, MART 83.62%, MMA 84.36, PGD 87.14%, Std 95.8% [wideresnet]
https://openreview.net/pdf?id=Ms9zjhVB5R (Table 1)

adversarial attacks:
l-2 @ eps = 32/255 (check): TRADES, MART, MMA, PGD

TBD: CWinf attacks

## Pretrained models for comparison

download pretrained models and place in ../trainedmodels/MNIST or ../trainedmodels/CIFAR10 respectively

### TRADES :
https://github.com/yaodongyu/TRADES (MNIST: small cnn, CIFAR10: WideResNet34)
### MMA : 
https://github.com/BorealisAI/mma_training (MNIST: lenet5, CIFAR10: WideResNet28)
### MART :
 https://github.com/YisenWang/MART (CIFAR10: ResNet18 and WideResNet34)

## IMPORT LIBRARIES

In [1]:
import numpy as np
import pandas as pd
from torch import nn, optim
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torchvision import transforms, datasets
from multiprocessing import cpu_count
from collections import OrderedDict
import matplotlib.pyplot as plt
import torch
import olympic
from typing import Union, Callable, Tuple
import sys
sys.path.append('../adversarial/')
sys.path.append('../architectures/')
from functional import boundary, iterated_fgsm, local_search, pgd, entropySmoothing
from ESGD_utils import *
import pickle
import time
import torch.backends.cudnn as cudnn
import argparse, math, random
import ESGD_optim
from trades import trades_loss

In [2]:
from torch.autograd import Variable

In [3]:
from torch.optim.lr_scheduler import StepLR

In [4]:
if torch.cuda.is_available():
    DEVICE = 'cuda'
else:
    DEVICE = 'cpu'

In [5]:
DEVICE = 'cpu'

# LOAD DATA

In [6]:
#place data folders outside working directory

In [7]:
kwargs = {'num_workers': 4, 'pin_memory': True}

In [8]:
args = {}
args['test_batch_size'] = 128
args['train_batch_size'] = 128
args['no_cuda'] = False
args['epsilon'] = 0.031
args['num_steps'] = 10
args['step_size'] = 0.007
args['random'] =True,
args['white_box_attack']=True
args['log_interval'] = 100
args['beta'] = 6.0
args['seed'] = 1
args['lr'] = 0.1
args['momentum'] = 0.9
args['epochs'] = 5
args['batch_size'] = 128
args['save_freq'] = 3

In [9]:
dataset = 'CIFAR10' # [MNIST, CIFAR10]
if dataset == 'MNIST':
    transform = transforms.Compose([
    transforms.ToTensor()])
    train = datasets.MNIST('../../data/MNIST', train=True, transform=transform, download=True)
    val = datasets.MNIST('../../data/MNIST', train=False, transform=transform, download=True)
elif dataset == 'CIFAR10':
    # setup data loader
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ])
    transform_test = transforms.Compose([
        transforms.ToTensor(),
    ])

    train = datasets.CIFAR10('../../data/CIFAR10', train=True, transform=transform_train, download=True)
    val = datasets.CIFAR10('../../data/CIFAR10', train=False, transform=transform_test, download=True)
    
train_loader = DataLoader(train, batch_size=args['test_batch_size'], shuffle=True, **kwargs)
val_loader = DataLoader(val, batch_size=args['train_batch_size'], shuffle=False, **kwargs)

Files already downloaded and verified
Files already downloaded and verified


In [10]:
transform_train

Compose(
    RandomCrop(size=(32, 32), padding=4)
    RandomHorizontalFlip(p=0.5)
    ToTensor()
)

# INITIALIZE NETWORK

In [11]:
if dataset=='CIFAR10':
    #[ResNet18,ResNet34,ResNet50,WideResNet]
    from resnet import ResNet18,ResNet34,ResNet50
    from wideresnet import WideResNet
    Net = WideResNet
    NetName = 'WideResNet'

In [12]:
Net

wideresnet.WideResNet

# RANDOM SEED 

In [13]:
seed = args['seed']
torch.set_num_threads(2)
if DEVICE=='cuda':
    torch.cuda.set_device(-1)
    torch.cuda.manual_seed(seed)
    cudnn.benchmark = True
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7fd54bd6e110>

In [14]:
def dataentropy(method,model, device, train_loader, adversary,L,step,eps,norm,random):
    totalloss = 0
    totalcorrect = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        ypred = model(data)
        
        sgd_loss = nn.CrossEntropyLoss()
        # calculate robust loss per batch
        loss, correct = batchentropy(model,sgd_loss,data,target,adversary,L,step,eps,norm)
        totalcorrect += correct
    print('robust train accuracy:',100*totalcorrect/len(train_loader.dataset))   
    print('data entropy:', totalloss/len(train_loader.dataset))

# TRAIN MODEL USING SAT

In [15]:
def batchentropy(model, loss_fn, x, y, adversary, L, step, eps, norm):
    """Performs a single update against a specified adversary"""
    model.train()
    # Adversial perturbation
    alpha=0.9
    loss = 0
    
    for l in range(L):     
        
        if l==0: ## initialize using random perturbation of true x, run for one epoch
            k=1
            random=True
            xp = None
            projector=False
        elif l>0 and l<L-1: ## initialize with previous iterate of adversarial perturbation, run one epoch
            k=1
            random=False
            xp=x_adv
            projector = False
        elif l == L-1: ## initialize with previous iterate, run one epoch, project to epsilon ball
            k=1
            random=False
            xp = x_adv
            projector=True
            
        x_adv = adversary(model, x, y, loss_fn, xp=xp, step=step, eps=eps, norm=norm, random=random, ep=1e-3,projector=projector)
        
        y_pred = model(x_adv)
        pred = y_pred.max(1, keepdim=True)[1]
        correct = pred.eq(y.view_as(pred)).sum().item()
        loss = (1-alpha)*loss + alpha*loss_fn(y_pred, y)

    return loss, correct

In [25]:
model_dir = '../SmallCNN_MNIST'

In [26]:
ls ../SmallCNN_MNIST

BEST_model-nn-epoch23-robacc96.pt  model-nn-epoch27-robacc96.pt
model-nn-epoch10-robacc52.pt       model-nn-epoch28-robacc83.pt
model-nn-epoch10-robacc75.pt       model-nn-epoch28-robacc90.pt
model-nn-epoch10-robacc81.pt       model-nn-epoch28-robacc96.pt
model-nn-epoch10-robacc83.pt       model-nn-epoch29-robacc83.pt
model-nn-epoch10-robacc88.pt       model-nn-epoch29-robacc90.pt
model-nn-epoch10-robacc89.pt       model-nn-epoch29-robacc96.pt
model-nn-epoch10-robacc94.pt       model-nn-epoch2-robacc10.pt
model-nn-epoch10-robacc95.pt       model-nn-epoch2-robacc11.pt
model-nn-epoch11-robacc82.pt       model-nn-epoch2-robacc50.pt
model-nn-epoch11-robacc89.pt       model-nn-epoch2-robacc56.pt
model-nn-epoch11-robacc94.pt       model-nn-epoch2-robacc62.pt
model-nn-epoch11-robacc95.pt       model-nn-epoch2-robacc67.pt
model-nn-epoch11-robacc96.pt       model-nn-epoch2-robacc70.pt
model-nn-epoch12-robacc81.pt       model-nn-epoch2-robacc73.pt
model-nn-epoch12-robacc89.pt     

In [None]:
if not os.path.exists(model_dir):
    os.makedirs(model_dir)

In [18]:
model_SATInf = Net().to(DEVICE)
#model_SATInf = nn.DataParallel(model_SATInf)
#load state dict here
#w = 46
#string = '../WRN_ATENT_lr0p1_decay/model-nn-epoch'+str(76)+'-robacc57'+'.pt' #//trained till 49 in WRN_ATTENT, till 74 in WRN_ATTENT2


In [19]:
string

'../WRN_ATENT_lr0p1_decay/model-nn-epoch76-robacc57.pt'

In [None]:
class WrappedModel(nn.Module):
    def __init__(self, module):
        super(WrappedModel, self).__init__()
        self.module = module # that I actually define.
    def forward(self, x):
        return self.module(x)

model = getattr(models, args.model)(args)
model = WrappedModel(model)
state_dict = torch.load(modelname)['state_dict']
model.load_state_dict(state_dict)

In [None]:
 net.module.load_state_dict(pertained_weights)

In [22]:
model_SATInf.module.load_state_dict(torch.load(string,map_location='cpu'))
#eval_train(model_SATInf, DEVICE, train_loader)

RuntimeError: Error(s) in loading state_dict for WideResNet:
	Missing key(s) in state_dict: "conv1.weight", "block1.layer.0.bn1.weight", "block1.layer.0.bn1.bias", "block1.layer.0.bn1.running_mean", "block1.layer.0.bn1.running_var", "block1.layer.0.conv1.weight", "block1.layer.0.bn2.weight", "block1.layer.0.bn2.bias", "block1.layer.0.bn2.running_mean", "block1.layer.0.bn2.running_var", "block1.layer.0.conv2.weight", "block1.layer.0.convShortcut.weight", "block1.layer.1.bn1.weight", "block1.layer.1.bn1.bias", "block1.layer.1.bn1.running_mean", "block1.layer.1.bn1.running_var", "block1.layer.1.conv1.weight", "block1.layer.1.bn2.weight", "block1.layer.1.bn2.bias", "block1.layer.1.bn2.running_mean", "block1.layer.1.bn2.running_var", "block1.layer.1.conv2.weight", "block1.layer.2.bn1.weight", "block1.layer.2.bn1.bias", "block1.layer.2.bn1.running_mean", "block1.layer.2.bn1.running_var", "block1.layer.2.conv1.weight", "block1.layer.2.bn2.weight", "block1.layer.2.bn2.bias", "block1.layer.2.bn2.running_mean", "block1.layer.2.bn2.running_var", "block1.layer.2.conv2.weight", "block1.layer.3.bn1.weight", "block1.layer.3.bn1.bias", "block1.layer.3.bn1.running_mean", "block1.layer.3.bn1.running_var", "block1.layer.3.conv1.weight", "block1.layer.3.bn2.weight", "block1.layer.3.bn2.bias", "block1.layer.3.bn2.running_mean", "block1.layer.3.bn2.running_var", "block1.layer.3.conv2.weight", "block1.layer.4.bn1.weight", "block1.layer.4.bn1.bias", "block1.layer.4.bn1.running_mean", "block1.layer.4.bn1.running_var", "block1.layer.4.conv1.weight", "block1.layer.4.bn2.weight", "block1.layer.4.bn2.bias", "block1.layer.4.bn2.running_mean", "block1.layer.4.bn2.running_var", "block1.layer.4.conv2.weight", "sub_block1.layer.0.bn1.weight", "sub_block1.layer.0.bn1.bias", "sub_block1.layer.0.bn1.running_mean", "sub_block1.layer.0.bn1.running_var", "sub_block1.layer.0.conv1.weight", "sub_block1.layer.0.bn2.weight", "sub_block1.layer.0.bn2.bias", "sub_block1.layer.0.bn2.running_mean", "sub_block1.layer.0.bn2.running_var", "sub_block1.layer.0.conv2.weight", "sub_block1.layer.0.convShortcut.weight", "sub_block1.layer.1.bn1.weight", "sub_block1.layer.1.bn1.bias", "sub_block1.layer.1.bn1.running_mean", "sub_block1.layer.1.bn1.running_var", "sub_block1.layer.1.conv1.weight", "sub_block1.layer.1.bn2.weight", "sub_block1.layer.1.bn2.bias", "sub_block1.layer.1.bn2.running_mean", "sub_block1.layer.1.bn2.running_var", "sub_block1.layer.1.conv2.weight", "sub_block1.layer.2.bn1.weight", "sub_block1.layer.2.bn1.bias", "sub_block1.layer.2.bn1.running_mean", "sub_block1.layer.2.bn1.running_var", "sub_block1.layer.2.conv1.weight", "sub_block1.layer.2.bn2.weight", "sub_block1.layer.2.bn2.bias", "sub_block1.layer.2.bn2.running_mean", "sub_block1.layer.2.bn2.running_var", "sub_block1.layer.2.conv2.weight", "sub_block1.layer.3.bn1.weight", "sub_block1.layer.3.bn1.bias", "sub_block1.layer.3.bn1.running_mean", "sub_block1.layer.3.bn1.running_var", "sub_block1.layer.3.conv1.weight", "sub_block1.layer.3.bn2.weight", "sub_block1.layer.3.bn2.bias", "sub_block1.layer.3.bn2.running_mean", "sub_block1.layer.3.bn2.running_var", "sub_block1.layer.3.conv2.weight", "sub_block1.layer.4.bn1.weight", "sub_block1.layer.4.bn1.bias", "sub_block1.layer.4.bn1.running_mean", "sub_block1.layer.4.bn1.running_var", "sub_block1.layer.4.conv1.weight", "sub_block1.layer.4.bn2.weight", "sub_block1.layer.4.bn2.bias", "sub_block1.layer.4.bn2.running_mean", "sub_block1.layer.4.bn2.running_var", "sub_block1.layer.4.conv2.weight", "block2.layer.0.bn1.weight", "block2.layer.0.bn1.bias", "block2.layer.0.bn1.running_mean", "block2.layer.0.bn1.running_var", "block2.layer.0.conv1.weight", "block2.layer.0.bn2.weight", "block2.layer.0.bn2.bias", "block2.layer.0.bn2.running_mean", "block2.layer.0.bn2.running_var", "block2.layer.0.conv2.weight", "block2.layer.0.convShortcut.weight", "block2.layer.1.bn1.weight", "block2.layer.1.bn1.bias", "block2.layer.1.bn1.running_mean", "block2.layer.1.bn1.running_var", "block2.layer.1.conv1.weight", "block2.layer.1.bn2.weight", "block2.layer.1.bn2.bias", "block2.layer.1.bn2.running_mean", "block2.layer.1.bn2.running_var", "block2.layer.1.conv2.weight", "block2.layer.2.bn1.weight", "block2.layer.2.bn1.bias", "block2.layer.2.bn1.running_mean", "block2.layer.2.bn1.running_var", "block2.layer.2.conv1.weight", "block2.layer.2.bn2.weight", "block2.layer.2.bn2.bias", "block2.layer.2.bn2.running_mean", "block2.layer.2.bn2.running_var", "block2.layer.2.conv2.weight", "block2.layer.3.bn1.weight", "block2.layer.3.bn1.bias", "block2.layer.3.bn1.running_mean", "block2.layer.3.bn1.running_var", "block2.layer.3.conv1.weight", "block2.layer.3.bn2.weight", "block2.layer.3.bn2.bias", "block2.layer.3.bn2.running_mean", "block2.layer.3.bn2.running_var", "block2.layer.3.conv2.weight", "block2.layer.4.bn1.weight", "block2.layer.4.bn1.bias", "block2.layer.4.bn1.running_mean", "block2.layer.4.bn1.running_var", "block2.layer.4.conv1.weight", "block2.layer.4.bn2.weight", "block2.layer.4.bn2.bias", "block2.layer.4.bn2.running_mean", "block2.layer.4.bn2.running_var", "block2.layer.4.conv2.weight", "block3.layer.0.bn1.weight", "block3.layer.0.bn1.bias", "block3.layer.0.bn1.running_mean", "block3.layer.0.bn1.running_var", "block3.layer.0.conv1.weight", "block3.layer.0.bn2.weight", "block3.layer.0.bn2.bias", "block3.layer.0.bn2.running_mean", "block3.layer.0.bn2.running_var", "block3.layer.0.conv2.weight", "block3.layer.0.convShortcut.weight", "block3.layer.1.bn1.weight", "block3.layer.1.bn1.bias", "block3.layer.1.bn1.running_mean", "block3.layer.1.bn1.running_var", "block3.layer.1.conv1.weight", "block3.layer.1.bn2.weight", "block3.layer.1.bn2.bias", "block3.layer.1.bn2.running_mean", "block3.layer.1.bn2.running_var", "block3.layer.1.conv2.weight", "block3.layer.2.bn1.weight", "block3.layer.2.bn1.bias", "block3.layer.2.bn1.running_mean", "block3.layer.2.bn1.running_var", "block3.layer.2.conv1.weight", "block3.layer.2.bn2.weight", "block3.layer.2.bn2.bias", "block3.layer.2.bn2.running_mean", "block3.layer.2.bn2.running_var", "block3.layer.2.conv2.weight", "block3.layer.3.bn1.weight", "block3.layer.3.bn1.bias", "block3.layer.3.bn1.running_mean", "block3.layer.3.bn1.running_var", "block3.layer.3.conv1.weight", "block3.layer.3.bn2.weight", "block3.layer.3.bn2.bias", "block3.layer.3.bn2.running_mean", "block3.layer.3.bn2.running_var", "block3.layer.3.conv2.weight", "block3.layer.4.bn1.weight", "block3.layer.4.bn1.bias", "block3.layer.4.bn1.running_mean", "block3.layer.4.bn1.running_var", "block3.layer.4.conv1.weight", "block3.layer.4.bn2.weight", "block3.layer.4.bn2.bias", "block3.layer.4.bn2.running_mean", "block3.layer.4.bn2.running_var", "block3.layer.4.conv2.weight", "bn1.weight", "bn1.bias", "bn1.running_mean", "bn1.running_var", "fc.weight", "fc.bias". 
	Unexpected key(s) in state_dict: "module.conv1.weight", "module.block1.layer.0.bn1.weight", "module.block1.layer.0.bn1.bias", "module.block1.layer.0.bn1.running_mean", "module.block1.layer.0.bn1.running_var", "module.block1.layer.0.bn1.num_batches_tracked", "module.block1.layer.0.conv1.weight", "module.block1.layer.0.bn2.weight", "module.block1.layer.0.bn2.bias", "module.block1.layer.0.bn2.running_mean", "module.block1.layer.0.bn2.running_var", "module.block1.layer.0.bn2.num_batches_tracked", "module.block1.layer.0.conv2.weight", "module.block1.layer.0.convShortcut.weight", "module.block1.layer.1.bn1.weight", "module.block1.layer.1.bn1.bias", "module.block1.layer.1.bn1.running_mean", "module.block1.layer.1.bn1.running_var", "module.block1.layer.1.bn1.num_batches_tracked", "module.block1.layer.1.conv1.weight", "module.block1.layer.1.bn2.weight", "module.block1.layer.1.bn2.bias", "module.block1.layer.1.bn2.running_mean", "module.block1.layer.1.bn2.running_var", "module.block1.layer.1.bn2.num_batches_tracked", "module.block1.layer.1.conv2.weight", "module.block1.layer.2.bn1.weight", "module.block1.layer.2.bn1.bias", "module.block1.layer.2.bn1.running_mean", "module.block1.layer.2.bn1.running_var", "module.block1.layer.2.bn1.num_batches_tracked", "module.block1.layer.2.conv1.weight", "module.block1.layer.2.bn2.weight", "module.block1.layer.2.bn2.bias", "module.block1.layer.2.bn2.running_mean", "module.block1.layer.2.bn2.running_var", "module.block1.layer.2.bn2.num_batches_tracked", "module.block1.layer.2.conv2.weight", "module.block1.layer.3.bn1.weight", "module.block1.layer.3.bn1.bias", "module.block1.layer.3.bn1.running_mean", "module.block1.layer.3.bn1.running_var", "module.block1.layer.3.bn1.num_batches_tracked", "module.block1.layer.3.conv1.weight", "module.block1.layer.3.bn2.weight", "module.block1.layer.3.bn2.bias", "module.block1.layer.3.bn2.running_mean", "module.block1.layer.3.bn2.running_var", "module.block1.layer.3.bn2.num_batches_tracked", "module.block1.layer.3.conv2.weight", "module.block1.layer.4.bn1.weight", "module.block1.layer.4.bn1.bias", "module.block1.layer.4.bn1.running_mean", "module.block1.layer.4.bn1.running_var", "module.block1.layer.4.bn1.num_batches_tracked", "module.block1.layer.4.conv1.weight", "module.block1.layer.4.bn2.weight", "module.block1.layer.4.bn2.bias", "module.block1.layer.4.bn2.running_mean", "module.block1.layer.4.bn2.running_var", "module.block1.layer.4.bn2.num_batches_tracked", "module.block1.layer.4.conv2.weight", "module.sub_block1.layer.0.bn1.weight", "module.sub_block1.layer.0.bn1.bias", "module.sub_block1.layer.0.bn1.running_mean", "module.sub_block1.layer.0.bn1.running_var", "module.sub_block1.layer.0.bn1.num_batches_tracked", "module.sub_block1.layer.0.conv1.weight", "module.sub_block1.layer.0.bn2.weight", "module.sub_block1.layer.0.bn2.bias", "module.sub_block1.layer.0.bn2.running_mean", "module.sub_block1.layer.0.bn2.running_var", "module.sub_block1.layer.0.bn2.num_batches_tracked", "module.sub_block1.layer.0.conv2.weight", "module.sub_block1.layer.0.convShortcut.weight", "module.sub_block1.layer.1.bn1.weight", "module.sub_block1.layer.1.bn1.bias", "module.sub_block1.layer.1.bn1.running_mean", "module.sub_block1.layer.1.bn1.running_var", "module.sub_block1.layer.1.bn1.num_batches_tracked", "module.sub_block1.layer.1.conv1.weight", "module.sub_block1.layer.1.bn2.weight", "module.sub_block1.layer.1.bn2.bias", "module.sub_block1.layer.1.bn2.running_mean", "module.sub_block1.layer.1.bn2.running_var", "module.sub_block1.layer.1.bn2.num_batches_tracked", "module.sub_block1.layer.1.conv2.weight", "module.sub_block1.layer.2.bn1.weight", "module.sub_block1.layer.2.bn1.bias", "module.sub_block1.layer.2.bn1.running_mean", "module.sub_block1.layer.2.bn1.running_var", "module.sub_block1.layer.2.bn1.num_batches_tracked", "module.sub_block1.layer.2.conv1.weight", "module.sub_block1.layer.2.bn2.weight", "module.sub_block1.layer.2.bn2.bias", "module.sub_block1.layer.2.bn2.running_mean", "module.sub_block1.layer.2.bn2.running_var", "module.sub_block1.layer.2.bn2.num_batches_tracked", "module.sub_block1.layer.2.conv2.weight", "module.sub_block1.layer.3.bn1.weight", "module.sub_block1.layer.3.bn1.bias", "module.sub_block1.layer.3.bn1.running_mean", "module.sub_block1.layer.3.bn1.running_var", "module.sub_block1.layer.3.bn1.num_batches_tracked", "module.sub_block1.layer.3.conv1.weight", "module.sub_block1.layer.3.bn2.weight", "module.sub_block1.layer.3.bn2.bias", "module.sub_block1.layer.3.bn2.running_mean", "module.sub_block1.layer.3.bn2.running_var", "module.sub_block1.layer.3.bn2.num_batches_tracked", "module.sub_block1.layer.3.conv2.weight", "module.sub_block1.layer.4.bn1.weight", "module.sub_block1.layer.4.bn1.bias", "module.sub_block1.layer.4.bn1.running_mean", "module.sub_block1.layer.4.bn1.running_var", "module.sub_block1.layer.4.bn1.num_batches_tracked", "module.sub_block1.layer.4.conv1.weight", "module.sub_block1.layer.4.bn2.weight", "module.sub_block1.layer.4.bn2.bias", "module.sub_block1.layer.4.bn2.running_mean", "module.sub_block1.layer.4.bn2.running_var", "module.sub_block1.layer.4.bn2.num_batches_tracked", "module.sub_block1.layer.4.conv2.weight", "module.block2.layer.0.bn1.weight", "module.block2.layer.0.bn1.bias", "module.block2.layer.0.bn1.running_mean", "module.block2.layer.0.bn1.running_var", "module.block2.layer.0.bn1.num_batches_tracked", "module.block2.layer.0.conv1.weight", "module.block2.layer.0.bn2.weight", "module.block2.layer.0.bn2.bias", "module.block2.layer.0.bn2.running_mean", "module.block2.layer.0.bn2.running_var", "module.block2.layer.0.bn2.num_batches_tracked", "module.block2.layer.0.conv2.weight", "module.block2.layer.0.convShortcut.weight", "module.block2.layer.1.bn1.weight", "module.block2.layer.1.bn1.bias", "module.block2.layer.1.bn1.running_mean", "module.block2.layer.1.bn1.running_var", "module.block2.layer.1.bn1.num_batches_tracked", "module.block2.layer.1.conv1.weight", "module.block2.layer.1.bn2.weight", "module.block2.layer.1.bn2.bias", "module.block2.layer.1.bn2.running_mean", "module.block2.layer.1.bn2.running_var", "module.block2.layer.1.bn2.num_batches_tracked", "module.block2.layer.1.conv2.weight", "module.block2.layer.2.bn1.weight", "module.block2.layer.2.bn1.bias", "module.block2.layer.2.bn1.running_mean", "module.block2.layer.2.bn1.running_var", "module.block2.layer.2.bn1.num_batches_tracked", "module.block2.layer.2.conv1.weight", "module.block2.layer.2.bn2.weight", "module.block2.layer.2.bn2.bias", "module.block2.layer.2.bn2.running_mean", "module.block2.layer.2.bn2.running_var", "module.block2.layer.2.bn2.num_batches_tracked", "module.block2.layer.2.conv2.weight", "module.block2.layer.3.bn1.weight", "module.block2.layer.3.bn1.bias", "module.block2.layer.3.bn1.running_mean", "module.block2.layer.3.bn1.running_var", "module.block2.layer.3.bn1.num_batches_tracked", "module.block2.layer.3.conv1.weight", "module.block2.layer.3.bn2.weight", "module.block2.layer.3.bn2.bias", "module.block2.layer.3.bn2.running_mean", "module.block2.layer.3.bn2.running_var", "module.block2.layer.3.bn2.num_batches_tracked", "module.block2.layer.3.conv2.weight", "module.block2.layer.4.bn1.weight", "module.block2.layer.4.bn1.bias", "module.block2.layer.4.bn1.running_mean", "module.block2.layer.4.bn1.running_var", "module.block2.layer.4.bn1.num_batches_tracked", "module.block2.layer.4.conv1.weight", "module.block2.layer.4.bn2.weight", "module.block2.layer.4.bn2.bias", "module.block2.layer.4.bn2.running_mean", "module.block2.layer.4.bn2.running_var", "module.block2.layer.4.bn2.num_batches_tracked", "module.block2.layer.4.conv2.weight", "module.block3.layer.0.bn1.weight", "module.block3.layer.0.bn1.bias", "module.block3.layer.0.bn1.running_mean", "module.block3.layer.0.bn1.running_var", "module.block3.layer.0.bn1.num_batches_tracked", "module.block3.layer.0.conv1.weight", "module.block3.layer.0.bn2.weight", "module.block3.layer.0.bn2.bias", "module.block3.layer.0.bn2.running_mean", "module.block3.layer.0.bn2.running_var", "module.block3.layer.0.bn2.num_batches_tracked", "module.block3.layer.0.conv2.weight", "module.block3.layer.0.convShortcut.weight", "module.block3.layer.1.bn1.weight", "module.block3.layer.1.bn1.bias", "module.block3.layer.1.bn1.running_mean", "module.block3.layer.1.bn1.running_var", "module.block3.layer.1.bn1.num_batches_tracked", "module.block3.layer.1.conv1.weight", "module.block3.layer.1.bn2.weight", "module.block3.layer.1.bn2.bias", "module.block3.layer.1.bn2.running_mean", "module.block3.layer.1.bn2.running_var", "module.block3.layer.1.bn2.num_batches_tracked", "module.block3.layer.1.conv2.weight", "module.block3.layer.2.bn1.weight", "module.block3.layer.2.bn1.bias", "module.block3.layer.2.bn1.running_mean", "module.block3.layer.2.bn1.running_var", "module.block3.layer.2.bn1.num_batches_tracked", "module.block3.layer.2.conv1.weight", "module.block3.layer.2.bn2.weight", "module.block3.layer.2.bn2.bias", "module.block3.layer.2.bn2.running_mean", "module.block3.layer.2.bn2.running_var", "module.block3.layer.2.bn2.num_batches_tracked", "module.block3.layer.2.conv2.weight", "module.block3.layer.3.bn1.weight", "module.block3.layer.3.bn1.bias", "module.block3.layer.3.bn1.running_mean", "module.block3.layer.3.bn1.running_var", "module.block3.layer.3.bn1.num_batches_tracked", "module.block3.layer.3.conv1.weight", "module.block3.layer.3.bn2.weight", "module.block3.layer.3.bn2.bias", "module.block3.layer.3.bn2.running_mean", "module.block3.layer.3.bn2.running_var", "module.block3.layer.3.bn2.num_batches_tracked", "module.block3.layer.3.conv2.weight", "module.block3.layer.4.bn1.weight", "module.block3.layer.4.bn1.bias", "module.block3.layer.4.bn1.running_mean", "module.block3.layer.4.bn1.running_var", "module.block3.layer.4.bn1.num_batches_tracked", "module.block3.layer.4.conv1.weight", "module.block3.layer.4.bn2.weight", "module.block3.layer.4.bn2.bias", "module.block3.layer.4.bn2.running_mean", "module.block3.layer.4.bn2.running_var", "module.block3.layer.4.bn2.num_batches_tracked", "module.block3.layer.4.conv2.weight", "module.bn1.weight", "module.bn1.bias", "module.bn1.running_mean", "module.bn1.running_var", "module.bn1.num_batches_tracked", "module.fc.weight", "module.fc.bias". 

In [None]:
#eval_adv_test_whitebox(model_SATInf, DEVICE, val_loader)            


In [21]:

dataentropy(batchentropy,model_SATInf, DEVICE, train_loader, adversary=entropySmoothing,L=10,step=0.007,eps=0.031,norm='inf',random=False)

 

RuntimeError: module must have its parameters and buffers on device cuda:0 (device_ids[0]) but found one of them on device: cpu