Evaluate the models

# Stochastic Adversarial Training (StochAT)

### SoTA

vanila SGD: 
MNIST - 99%+ (most cnns), CIFAR10 - 93%+ (resnet18), 96%+ (wideresnet) 

MNIST:

adversarial attacks: 
l-inf @ eps = 80/255 @20 steps: TRADES - 96.07% - (4 layer cnn), MART 96.4%, MMA 95.5%, PGD - 96.01% - (4 layer cnn)

adversarial attacks:
l-2 @ eps = 32/255 (check): TRADES, MMA, PGD

CIFAR10:

adversarial attacks: 
l-inf @ eps = 8/255 @20 steps: 
TRADES 53-56% - (WRN-34-10), MART 57-58% (WRN-34-10), MMA 47%, PGD 48% - (WRN-32-10)// 49% - (WRN-34-10), Std - 0.03%
https://openreview.net/pdf?id=rklOg6EFwS (Table 4)

adversarial attacks: 
l-inf @ eps = 8/255 @20 steps: 
[ResNet10] TRADES 45.4%, MART 46.6%, MMA 37.26%, PGD 42.27%, Std 0.14%

Benign accuracies: TRADES 84.92%, MART 83.62%, MMA 84.36, PGD 87.14%, Std 95.8% [wideresnet]
https://openreview.net/pdf?id=Ms9zjhVB5R (Table 1)

adversarial attacks:
l-2 @ eps = 32/255 (check): TRADES, MART, MMA, PGD

TBD: CWinf attacks

## Pretrained models for comparison

download pretrained models and place in ../trainedmodels/MNIST or ../trainedmodels/CIFAR10 respectively

### TRADES :
https://github.com/yaodongyu/TRADES (MNIST: small cnn, CIFAR10: WideResNet34)
### MMA : 
https://github.com/BorealisAI/mma_training (MNIST: lenet5, CIFAR10: WideResNet28)
### MART :
 https://github.com/YisenWang/MART (CIFAR10: ResNet18 and WideResNet34)

## IMPORT LIBRARIES

In [1]:
import numpy as np
import pandas as pd
from torch import nn, optim
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torchvision import transforms, datasets
from multiprocessing import cpu_count
from collections import OrderedDict
import matplotlib.pyplot as plt
import torch
import olympic
from typing import Union, Callable, Tuple
import sys
sys.path.append('../adversarial/')
sys.path.append('../architectures/')
from functional import boundary, iterated_fgsm, local_search, pgd, entropySmoothing
from ESGD_utils import *
import pickle
import time
import torch.backends.cudnn as cudnn
import argparse, math, random
import ESGD_optim
from trades import trades_loss
import advertorch
from torch.autograd import Variable
from utils import project

In [2]:
if torch.cuda.is_available():
    DEVICE = 'cuda'
else:
    DEVICE = 'cpu'

In [3]:
DEVICE

'cuda'

# LOAD DATA

In [4]:
#place data folders outside working directory

In [5]:
kwargs = {'num_workers': 4, 'pin_memory': True}

In [6]:
args = {}
args['test_batch_size'] = 128
args['no_cuda'] = False
args['epsilon'] = 0.435
args['num_steps'] = 10
args['step_size'] = 0.01#2.5*args['epsilon']/args['num_steps']
args['random'] =True,
args['white_box_attack']=True
args['log_interval'] = 100
args['beta'] = 1
args['seed'] = 2
args['lr'] = 0.1
args['momentum'] = 0.9
args['batch_size'] = 128
args['attack'] = 'l_2'
args['norm'] = 2
args['epochs'] = 30

In [7]:
dataset = 'CIFAR10' # [MNIST, CIFAR10]

#elif dataset == 'CIFAR10':
# setup data loader
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])
transform_test = transforms.Compose([
    transforms.ToTensor(),
])

train = datasets.CIFAR10('../../data/CIFAR10', train=True, transform=transform_train, download=True)
val = datasets.CIFAR10('../../data/CIFAR10', train=False, transform=transform_test, download=True)

train_loader = DataLoader(train, batch_size=args['batch_size'], shuffle=True, **kwargs)
val_loader = DataLoader(val, batch_size=args['test_batch_size'], shuffle=False, **kwargs)

Files already downloaded and verified
Files already downloaded and verified


In [25]:
transform_train

Compose(
    RandomCrop(size=(32, 32), padding=4)
    RandomHorizontalFlip(p=0.5)
    ToTensor()
)

# INITIALIZE NETWORK

In [8]:
if dataset=='CIFAR10':
    #[ResNet18,ResNet34,ResNet50,WideResNet]
    from resnet import ResNet18,ResNet34,ResNet50
    from wideresnet import WideResNet
    Net = ResNet18
    NetName = 'ResNet18'

In [9]:
Net

<function resnet.ResNet18()>

# RANDOM SEED 

In [10]:
seed = args['seed']
torch.set_num_threads(2)
if DEVICE=='cuda':
    torch.cuda.set_device(-1)
    torch.cuda.manual_seed(seed)
    cudnn.benchmark = True
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7f8f50627110>

In [11]:
def eval_train(model, device, train_loader):
    model.eval()
    train_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            train_loss += F.cross_entropy(output, target, size_average=False).item()
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()
    train_loss /= len(train_loader.dataset)
    print('Training: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        train_loss, correct, len(train_loader.dataset),
        100. * correct / len(train_loader.dataset)))
    training_accuracy = correct / len(train_loader.dataset)
    return train_loss, training_accuracy

In [12]:
def eval_test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, size_average=False).item()
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    print('Test: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    test_accuracy = correct / len(test_loader.dataset)
    return test_loss, test_accuracy

In [13]:
def _pgd_whitebox(model,
                  X,
                  y,
                  epsilon=args['epsilon'],
                  norm=args['norm'],
                  num_steps=args['num_steps'],
                  step_size=args['step_size']):
    out = model(X)
    err = (out.data.max(1)[1] != y.data).float().sum()
    X_pgd = Variable(X.data, requires_grad=True)
    if args['random']:
        random_noise = torch.FloatTensor(*X_pgd.shape).uniform_(-epsilon, epsilon).to(DEVICE)
        X_pgd = Variable(X_pgd.data + random_noise, requires_grad=True)

    for _ in range(num_steps):
        opt = optim.SGD([X_pgd], lr=1e-3)
        opt.zero_grad()

        with torch.enable_grad():
            loss = nn.CrossEntropyLoss()(model(X_pgd), y)
        loss.backward()
        if norm=='inf':
            eta = step_size * X_pgd.grad.data.sign()
            X_pgd = Variable(X_pgd.data + eta, requires_grad=True)
            eta = torch.clamp(X_pgd.data - X.data, -epsilon, epsilon)
            X_pgd = Variable(X.data + eta, requires_grad=True)
        elif norm==2:
            #print('l2 attack')
            eta = step_size * X_pgd.grad.data / X_pgd.grad.view(X_pgd.shape[0], -1).norm(2, dim=-1)\
                    .view(-1, 1, 1, 1)
            X_pgd = Variable(X_pgd.data + eta, requires_grad=True)
            X_pgd = project(X, X_pgd, norm, epsilon)            
        X_pgd = Variable(torch.clamp(X_pgd, 0, 1.0), requires_grad=True)
        #print('distance of attack:',torch.norm(X_pgd-X)/np.sqrt(128))
    err_pgd = (model(X_pgd).data.max(1)[1] != y.data).float().sum()
    
    with torch.no_grad():
        loss_pgd = nn.CrossEntropyLoss()(model(X_pgd), y)
    #print('err pgd (white-box): ', err_pgd)
    return err, err_pgd, loss_pgd.item()

def eval_adv_test_whitebox(model, device, test_loader):
    """
    evaluate model by white-box attack
    """
    model.eval()
    robust_err_total = 0
    natural_err_total = 0
    lossrob  = 0
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        # pgd attack
        X, y = Variable(data, requires_grad=True), Variable(target)
        err_natural, err_robust, losspgd = _pgd_whitebox(model, X, y)
        robust_err_total += err_robust
        natural_err_total += err_natural
        lossrob = lossrob + losspgd
    rob = 100-100*robust_err_total.item()/len(test_loader.dataset)   
    lossrob /= len(test_loader)
    print('robust test loss:',lossrob)
    print('natural_acc_total: ', 100-100*natural_err_total.item()/len(test_loader.dataset))
    print('robust_acc_total: ', rob)
    return rob

In [14]:
if not os.path.exists('../trainedmodels'):
    os.makedirs('../trainedmodels')

In [15]:
def infnorm(x):
    infn = torch.max(torch.abs(x.detach().cpu()))
    return infn

In [16]:
def train_adversarial(method,model, device, train_loader, optimizer, epoch,adversary,L,step,eps,norm):
    totalcorrect = 0
    totalloss = 0
    batches = len(train_loader)
    for batch_idx, (data, target) in enumerate(train_loader):
        #print('batch:{:d}/{:d}'.format(batch_idx,batches))
        data, target = data.to(device), target.to(device)        
        sgd_loss = nn.CrossEntropyLoss()
        # calculate robust loss per batch
        loss, correct = method(model,optimizer,sgd_loss,data,target,epoch,adversary,L,step,eps,norm)
        totalcorrect += correct
        totalloss += loss
    totalloss /= batches
    print('robust train accuracy:',100*totalcorrect/len(train_loader.dataset),'rob train loss:',totalloss.item())   

# TRAIN MODEL USING SAT

In [17]:
def adversarial_training(model, optimiser, loss_fn, x, y, epoch, adversary, k, step, eps, norm):
    """Performs a single update against a specified adversary"""
    model.train()
    
    # Adversial perturbation
    x_adv = adversary(model, x, y, loss_fn, k=k, step=step, eps=eps, norm=norm, random=True)
    #print(torch.norm(x-x_adv)/np.sqrt(args['batch_size']))
    optimiser.zero_grad()
    y_pred = model(x_adv)
    pred = y_pred.max(1, keepdim=True)[1]
    correct = pred.eq(y.view_as(pred)).sum().item()
    loss = loss_fn(y_pred, y)
    loss.backward()
    optimiser.step()
    #print('% correct:',100*correct/args['batch_size'])
    return loss, correct

In [18]:
def adversarial_training_entropy(model, optimiser, loss_fn, x, y, epoch, adversary, L, step, eps, norm):
    """Performs a single update against a specified adversary"""
    model.train()
    # Adversial perturbation
    alpha=0.95
    loss = 0
    if norm==2:
        gamma = 5e-2
    elif norm=='inf':
        gamma = 0
    
    for l in range(L):     
        
        if l==0: ## initialize using random perturbation of true x, run for one epoch
            k=1
            random=True
            xp = None
            projector=False
        elif l>0 and l<L-1: ## initialize with previous iterate of adversarial perturbation, run one epoch
            k=1
            random=False
            xp=x_adv
            projector = False
        elif l == L-1: ## initialize with previous iterate, run one epoch, project to epsilon ball
            k=1
            random=False
            xp = x_adv
            if norm==2:
                projector=False
            elif norm=='inf':    
                projector=True
            
        x_adv,bfl = adversary(model, x, y, loss_fn, xp=xp, step=step, eps=eps, norm=norm, random=random, ep=1e-3,projector=projector,gamma=gamma, debug=False)
        optimiser.zero_grad()
        y_pred = model(x_adv)
        pred = y_pred.max(1, keepdim=True)[1]
        correct = pred.eq(y.view_as(pred)).sum().item()
        
        loss_lg = loss_fn(y_pred, y)
        #print(loss_lg)
        #print('langevin iter:',l,'loss langevin:',loss_lg.item())
        loss = (1-alpha)*loss + alpha*loss_lg
        #print('adv dist:',torch.norm(x_adv-x)/np.sqrt(128))
        
        if bfl:
            break
        
        
    loss.backward()
    optimiser.step()
    return loss, correct

In [19]:
model_dir = '../ResNet18_l2'

In [20]:
if not os.path.exists(model_dir):
    os.makedirs(model_dir)

In [21]:
def adjust_learning_rate(optimizer, epoch,lr_init):
    """decrease the learning rate"""
    lr = lr_init
    if epoch >= 15:   
        lr = lr_init * 0.2
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [22]:
## initialize model
model_ATENT = Net().to(DEVICE)
## training params
epochs = args['epochs']  
lr_init = args['lr']
#optimizer = optim.Adam(model_ATENT.parameters(), lr=lr_init)

optimizer = optim.SGD(model_ATENT.parameters(), lr=lr_init, momentum=0.9)
## train model

for epoch in range(1, epochs+1):
    print('Epoch:',epoch)
    
    # adjust learning rate for SGD
    #adjust_learning_rate(optimizer, epoch,lr_init)
    
    # adversarial training
    train_adversarial(adversarial_training_entropy,model_ATENT, DEVICE, train_loader, optimizer, epoch,adversary=entropySmoothing,L=args['num_steps'],step=3*args['step_size'],eps=args['epsilon'],norm=args['norm'])

    # evaluation on natural and adversarial examples
    print('================================================================')
    eval_train(model_ATENT, DEVICE, train_loader)
    rob = eval_adv_test_whitebox(model_ATENT, DEVICE, val_loader)            
    print('================================================================')

    # save checkpoint
    torch.save(model_ATENT.state_dict(),
                   os.path.join(model_dir, 'model-nn-epoch{}-robacc{}.pt'.format(epoch,int(np.round(rob)))))

## save model

#modelname = '../trainedmodels/'+dataset+'/'+NetName+'_ATENT_'+args['attack']+'_ep'+str(epochs)+'_lr'+str(lr_init)+'.pt'
#torch.save(model_ATENT,modelname)

Epoch: 1
robust train accuracy: 23.226 rob train loss: 2.0857880115509033




Training: Average loss: 1.6343, Accuracy: 19179/50000 (38%)
robust test loss: 1.6494449829753441
natural_acc_total:  39.62
robust_acc_total:  37.45
Epoch: 2
robust train accuracy: 35.524 rob train loss: 1.7039663791656494
Training: Average loss: 1.4590, Accuracy: 23287/50000 (47%)
robust test loss: 1.4800871067409274
natural_acc_total:  47.89
robust_acc_total:  44.86
Epoch: 3
robust train accuracy: 42.162 rob train loss: 1.5357625484466553
Training: Average loss: 1.2609, Accuracy: 26712/50000 (53%)
robust test loss: 1.3342547160160692
natural_acc_total:  53.74
robust_acc_total:  50.92
Epoch: 4
robust train accuracy: 48.084 rob train loss: 1.3873941898345947
Training: Average loss: 1.1316, Accuracy: 29912/50000 (60%)
robust test loss: 1.2169334903548035
natural_acc_total:  60.19
robust_acc_total:  56.08
Epoch: 5
robust train accuracy: 52.892 rob train loss: 1.264775276184082
Training: Average loss: 0.9867, Accuracy: 32400/50000 (65%)
robust test loss: 1.0651792318006106
natural_acc_tota

KeyboardInterrupt: 

In [23]:
eval_train(model_ATENT, DEVICE, train_loader)
rob = eval_adv_test_whitebox(model_ATENT, DEVICE, val_loader)   

Training: Average loss: 0.4549, Accuracy: 42770/50000 (86%)
robust test loss: 0.6056278585633145
natural_acc_total:  82.71000000000001
robust_acc_total:  78.83


In [None]:
## initialize model
model_PGD = Net().to(DEVICE)
## training params
epochs = args['epochs']  
lr_init = args['lr']
#optimizer = optim.Adam(model_PGD.parameters(), lr=lr_init)

optimizer = optim.SGD(model_PGD.parameters(), lr=lr_init, momentum=0.9)
## train model

for epoch in range(1, epochs+1):
    print('Epoch:',epoch)
    
    # adjust learning rate for SGD
    #adjust_learning_rate(optimizer, epoch,lr_init)
    
    # adversarial training
    train_adversarial(adversarial_training,model_PGD, DEVICE, train_loader, optimizer, epoch,adversary=pgd,L=args['num_steps'],step=2*args['step_size'],eps=args['epsilon'],norm=args['norm'])

    # evaluation on natural and adversarial examples
    print('================================================================')
    eval_train(model_PGD, DEVICE, train_loader)
    eval_test(model_PGD, DEVICE, val_loader)

    rob = eval_adv_test_whitebox(model_PGD, DEVICE, val_loader)            
    print('================================================================')


In [None]:
def train_trades(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()

        # calculate robust loss
        loss = trades_loss(model=model,
                           x_natural=data,
                           y=target,
                           optimizer=optimizer,
                           step_size=args['step_size'],
                           epsilon=args['epsilon'],
                           perturb_steps=args['num_steps'],
                           beta=args['beta'])

        loss.backward()
        optimizer.step()

## initialize model
model_TRADES = Net().to(DEVICE)
## training params
lr_init = args['lr']
optimizer = optim.SGD(model_TRADES.parameters(), lr=lr_init, momentum=0.9)
epochs = args['epochs']
## train model

for epoch in range(1, epochs + 1):
    print('Epoch:',epoch)
    # adjust learning rate for SGD
    adjust_learning_rate(optimizer, epoch, lr_init = lr_init)

    # adversarial training
    train_trades(args, model_TRADES, DEVICE, train_loader, optimizer, epoch)

    # evaluation on natural examples
    if epoch % 1 == 0:
        print('================================================================')
        eval_train(model_TRADES, DEVICE, train_loader)
        eval_adv_test_whitebox(model_TRADES, DEVICE, val_loader)
        print('================================================================')

## save model
modelname = '../trainedmodels/'+dataset+'/'+NetName+'_TRADES_'+args['attack']+'_ep'+str(epochs)+'_lr'+str(lr_init)+'.pt'
#torch.save(model_TRADES,modelname)