In [91]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import torch
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, ToTensor
from art.attacks.evasion import FastGradientMethod, ProjectedGradientDescentPyTorch, CarliniLInfMethod, CarliniL2Method, AutoAttack
from art.estimators.classification import PyTorchClassifier
import sys
sys.path.append('../architectures/')
sys.path.append('../adversarial/')
import small_cnn
from small_cnn import SmallCNN

In [92]:
from advertorch_examples.models import LeNet5Madry
Net = LeNet5Madry
NetName = 'LeNet5Madry'

In [93]:
model = Net()

In [94]:
path = '../trainedmodels/model_best.pt'

In [96]:
model.load_state_dict(torch.load(path)['model'])

<All keys matched successfully>

In [2]:
# sd = torch.load('BEST_model-nn-epoch74-robacc57.pt')
# model = WideResNet(depth=34)
# model = nn.DataParallel(model)
# model.load_state_dict(sd)
# torch.save(model.module.state_dict(), 'best_atent_model_74.pt')

In [8]:
device = torch.device('cuda')

In [84]:
model = SmallCNN().to(device)
#model.load_state_dict(torch.load('MNIST_models/LINF003BEST_model-nn-epoch23-robacc96.pt'))
#model.load_state_dict(torch.load('MNIST_models/model_mnist_smallcnn_trades.pt'))
#enum=49 #41 48 #49
path = '../trainedmodels/MNIST/SmallCNN_ESGD_ep20_lr0.01.pt'
#model.load_state_dict(torch.load(path))
#model.eval()

In [85]:
model = torch.load(path).to(device)

In [37]:
loss = nn.CrossEntropyLoss(reduction='mean')
optim = torch.optim.SGD(params = model.parameters(), lr=0.01)
model = model.to(device)
pytorch_clf = PyTorchClassifier(model, loss=loss, optimizer=optim, input_shape=(1,32,32), nb_classes=10, device_type='gpu', clip_values=(0,1), preprocessing=None)

In [38]:
ds = MNIST('../../data/', download=True, train=False, transform=Compose([ToTensor()]))
dl = DataLoader(ds, batch_size=128)

In [39]:
correct = 0.0
correct_2 = 0.0
for x, y in dl:
    x = x.to(device)
    #y = y.to(device)
    #preds = torch.argmax(torch.tensor(pytorch_clf.predict(x.cpu().detach())), dim=-1)
    #correct+=preds.eq(y.view_as(preds)).sum().item()
    preds2 = torch.argmax(torch.tensor(model(x)), axis=-1).cpu()
    correct_2+=preds2.eq(y.view_as(preds2)).sum().item()
    #correct += (preds==y).sum()
#print(correct/10000.0)
print(correct_2/10000.0)

  preds2 = torch.argmax(torch.tensor(model(x)), axis=-1).cpu()


0.9924


In [89]:
#attack = ProjectedGradientDescentPyTorch(pytorch_clf, norm=np.inf, eps=0.3, eps_step=0.01, num_random_init=10, max_iter=40, batch_size=128, verbose=False)
attack = CarliniLInfMethod(pytorch_clf, learning_rate=0.3, max_iter=40,  eps=0.3, batch_size=128,verbose=False)

In [90]:
correct = 0.0
correct_2 = 0.0
epsilons = []
for x, y in dl:
    x = x.to(device)
    #y = y.to(device)
    preds = torch.argmax(torch.tensor(pytorch_clf.predict(x.cpu())), dim=-1)
    x_adv = attack.generate(x.cpu())
    preds2 = torch.argmax(torch.tensor(pytorch_clf.predict(x_adv)), dim=-1)
    tmp_eps = torch.norm(torch.tensor(x_adv).view(-1, 28*28*1).cpu() - x.view(-1, 28*28*1).cpu(), dim=1, p=np.inf)
    epsilons.extend(tmp_eps.tolist())
    correct+=preds.eq(y.view_as(preds)).sum().item()
    correct_2 += preds2.eq(y.view_as(preds)).sum().item()
    #correct += (preds==y).sum()
print(correct/10000.0)
print(correct_2/10000.0)

0.9924
0.3465


In [64]:
from utils import project

In [76]:
args = {}
args['random'] = True

In [79]:
def _pgd_whitebox(model,
                  X,
                  y,
                  epsilon=2,
                  norm=2,
                  num_steps=40,
                  step_size=0.25):
    out = model(X)
    err = (out.data.max(1)[1] != y.data).float().sum()
    X_pgd = Variable(X.data, requires_grad=True)
    if args['random']:
        random_noise = torch.FloatTensor(*X_pgd.shape).uniform_(-epsilon, epsilon).to(device)
        X_pgd = Variable(X_pgd.data + random_noise, requires_grad=True)

    for _ in range(num_steps):
        opt = optim.SGD([X_pgd], lr=1e-3)
        opt.zero_grad()

        with torch.enable_grad():
            loss = nn.CrossEntropyLoss()(model(X_pgd), y)
        loss.backward()
        if norm=='inf':
            eta = step_size * X_pgd.grad.data.sign()
            X_pgd = Variable(X_pgd.data + eta, requires_grad=True)
            eta = torch.clamp(X_pgd.data - X.data, -epsilon, epsilon)
            X_pgd = Variable(X.data + eta, requires_grad=True)
        elif norm==2:
            #print('l2 attack')
            eta = step_size * X_pgd.grad.data / X_pgd.grad.view(X_pgd.shape[0], -1).norm(2, dim=-1)\
                    .view(-1, 1, 1, 1)
            X_pgd = Variable(X_pgd.data + eta, requires_grad=True)
            X_pgd = project(X, X_pgd, norm, epsilon)            
        X_pgd = Variable(torch.clamp(X_pgd, 0, 1.0), requires_grad=True)
        #print('distance of attack:',torch.norm(X_pgd-X)/np.sqrt(128))
    err_pgd = (model(X_pgd).data.max(1)[1] != y.data).float().sum()
    
    with torch.no_grad():
        loss_pgd = nn.CrossEntropyLoss()(model(X_pgd), y)
    #print('err pgd (white-box): ', err_pgd)
    return err, err_pgd, loss_pgd.item()

def eval_adv_test_whitebox(model, device, test_loader):
    """
    evaluate model by white-box attack
    """
    model.eval()
    robust_err_total = 0
    natural_err_total = 0
    lossrob  = 0
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        # pgd attack
        X, y = Variable(data, requires_grad=True), Variable(target)
        err_natural, err_robust, losspgd = _pgd_whitebox(model, X, y)
        robust_err_total += err_robust
        natural_err_total += err_natural
        lossrob = lossrob + losspgd
    rob = 100-100*robust_err_total.item()/len(test_loader.dataset)   
    lossrob /= len(test_loader)
    print('robust test loss:',lossrob)
    print('natural_acc_total: ', 100-100*natural_err_total.item()/len(test_loader.dataset))
    print('robust_acc_total: ', rob)
    return rob

In [73]:
from torch.autograd import Variable

In [82]:
from torch import optim

In [86]:
rob = eval_adv_test_whitebox(model,'cuda', dl)            

robust test loss: 8.217267715478245
natural_acc_total:  99.24
robust_acc_total:  19.120000000000005


In [62]:
#attack = ProjectedGradientDescentPyTorch(pytorch_clf, norm=np.inf, eps=0.3, eps_step=0.01, num_random_init=10, max_iter=40, batch_size=128)
# attack = CarliniLInfMethod(pytorch_clf, learning_rate=0.01, max_iter=40,  eps=0.3, batch_size=128, verbose=False)
attack = ProjectedGradientDescentPyTorch(pytorch_clf, norm=2, eps=2, eps_step=4, num_random_init=1, max_iter=40, batch_size=128,verbose=False)
#attack = CarliniL2Method(pytorch_clf, learning_rate=0.01, max_iter=40,  eps=2, batch_size=128, verbose=False)

In [63]:
correct = 0.0
correct_2 = 0.0
epsilons = []
for x, y in dl:
    x = x.to(device)
    #y = y.to(device)
    preds = torch.argmax(torch.tensor(pytorch_clf.predict(x.cpu())), dim=-1)
    x_adv = attack.generate(x.cpu())
    preds2 = torch.argmax(torch.tensor(pytorch_clf.predict(x_adv)), dim=-1)
    tmp_eps = torch.norm(torch.tensor(x_adv).view(-1, 28*28*1).cpu() - x.view(-1, 28*28*1).cpu(), dim=1, p=np.inf)
    epsilons.extend(tmp_eps.tolist())
    correct+=preds.eq(y.view_as(preds)).sum().item()
    correct_2 += preds2.eq(y.view_as(preds)).sum().item()
    #correct += (preds==y).sum()
print(correct/10000.0)
print(correct_2/10000.0)

0.9924
0.4204


In [30]:
np.save('pgd_inf_mnist_atent', epsilons)

In [16]:
from foolbox.attacks import LinfDeepFoolAttack
from foolbox.models import PyTorchModel

In [19]:
model_fb = PyTorchModel(model, bounds=(0,1))