In [1]:
import argparse
import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.optim.lr_scheduler import StepLR
import matplotlib.pyplot as plt
from foolbox import PyTorchModel, accuracy, samples
from foolbox.attacks import LinfPGD, FGSM
from advertorch.attacks import LinfSPSAAttack
from trainers import Trainer, FGSMTrainer
from robustbench.model_zoo.models import Carmon2019UnlabeledNet
from utils import adversarial_accuracy, fgsm_
import eagerpy as ep
from Nets import CIFAR_Wide_Res_Net, CIFAR_Res_Net
%load_ext autoreload
%autoreload 2
%aimport Nets, trainers

## Setup to import trained CIFAR-10 Models

In [2]:
# setup
device = torch.device("cuda")
batch_size = 8
# remove the normalize
transform = transform = transforms.Compose(
            [transforms.ToTensor()]
)
        
normalized_min = (0 - 0.5) / 0.5
normalized_max = (1 - 0.5) / 0.5
train_dataset = datasets.CIFAR10(root='./data', train=True,
                                download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                  shuffle=True, num_workers=2)
test_dataset = datasets.CIFAR10(root='./data', train=False,
                               download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,
                                 shuffle=False, num_workers=2)
classes = classes = ('plane', 'car', 'bird', 'cat',
   'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


## Load Regular CIFAR-10 Model

In [6]:
model = CIFAR_Res_Net(device).eval()
model.load_state_dict(torch.load("models/cifar_res_net.model"))

<All keys matched successfully>

## Load CIFAR-10 Model trained with large FGSM steps

In [7]:
fgsm_model = CIFAR_Res_Net(device).eval()
fgsm_model.load_state_dict(torch.load("models/cifar_res_net_fgsm06.model"))

<All keys matched successfully>

In [8]:
def get_accuracy(model, attack=None, epsilon=0.03):
    fmodel = PyTorchModel(model, bounds=(0, 1))
    correct = 0
    for images, labels in test_loader:
        images, labels = images.to(device), labels.type(torch.cuda.LongTensor)
        if attack is None:
            correct += accuracy(fmodel, images, labels) * images.shape[0]
        else:
            raw_advs, clipped_advs, success = attack(fmodel, images, labels, epsilons=epsilon)
            correct += (~success).sum().item()
    return correct / len(test_loader.dataset)

In [9]:
print("Model accuracy: {}%, FGSM model accuracy: {}%".format(get_accuracy(model)*100, get_accuracy(fgsm_model)*100))

Model accuracy: 84.58%, FGSM model accuracy: 72.44%


In [10]:
eps = 0.03
attack = FGSM()
print("FGSM attack with eps={}. Model accuracy: {}%, FGSM model accuracy: {}%".format(eps, get_accuracy(model, attack=attack, epsilon=eps)*100, get_accuracy(fgsm_model, attack=attack, epsilon=eps)*100))

FGSM attack with eps=0.03. Model accuracy: 4.569999999999999%, FGSM model accuracy: 74.5%


In [11]:
eps = 0.03
attack = FGSM(random_start=True)
print("FGSM attack with eps={} and random step. Model accuracy: {}%, FGSM model accuracy: {}%".format(eps, get_accuracy(model, attack=attack, epsilon=eps)*100, get_accuracy(fgsm_model, attack=attack, epsilon=eps)*100))

FGSM attack with eps=0.03 and random step. Model accuracy: 6.94%, FGSM model accuracy: 44.519999999999996%


## Test for gradient masking

In [12]:
def spsa_accuracy(model, eps=0.03, iters=1, nb_sample=128):
    attack = LinfSPSAAttack(model, eps, nb_iter=iters, nb_sample=nb_sample, loss_fn=nn.CrossEntropyLoss(reduction='none'))
    subset = torch.utils.data.Subset(test_dataset, np.random.randint(0, len(test_dataset), size=100).tolist())
    subset_loader = torch.utils.data.DataLoader(subset, batch_size=batch_size,
                                 shuffle=False, num_workers=2)
    correct = 0
    for images, labels in tqdm.tqdm(subset_loader):
        images, labels = images.to(device), labels.type(torch.cuda.LongTensor)
        adv = attack.perturb(images, labels)
        preds = model(adv).argmax(-1)
        correct += (preds == labels).sum().item()
    return correct / len(subset_loader.dataset)

In [13]:
spsa_accuracy(fgsm_model, iters=10, nb_sample=300)

100%|██████████████████████████████████████████████████████████████████████████████████| 13/13 [01:20<00:00,  6.16s/it]


0.06

In [14]:
spsa_accuracy(model, iters=10, nb_sample=300)

100%|██████████████████████████████████████████████████████████████████████████████████| 13/13 [01:20<00:00,  6.17s/it]


0.19