# (0) Download Butterfly 

In [None]:
!kaggle datasets download -d gpiosenka/butterfly-images40-species

In [None]:
import zipfile
with zipfile.ZipFile('butterfly-images40-species.zip', 'r') as zip_ref:
    zip_ref.extractall('data/butterfly')

# (1) Process Butterfly Data

In [1]:
from utils.data import create_butterfly_dataset
trainset, testset, normal_mapping, reverse_mapping = create_butterfly_dataset()
assert len(trainset) == 12594, 'Size of train set not match'
assert len(testset) == 500, 'Size of test set not match'

# (2) Import Classifier

In [2]:
from model.butterfly_classifier import DenseNet121
import torch

model = DenseNet121(num_classes=len(normal_mapping)).to('cuda')
model.load_state_dict(torch.load('./model/states/butterfly_classifier.pth'))

<All keys matched successfully>

# (3) Evaluate Untargeted Adversarial Examples

In [3]:
from torch.utils.data import DataLoader
from utils.base import eval_accuracy


acc = eval_accuracy(model, testset,  batch_size=100)
print('Accuracy on test set is {}'.format(acc))

Accuracy on test set is 0.9640000462532043


In [None]:
from algo.attacker import adversarial_generator

test_img, test_label = testset[5]
test_img = test_img.unsqueeze(0)

adv_img = adversarial_generator(model = model, target_class=test_label,
                             image=test_img, 
                             search_var=1e-3,
                             sample_num=50,
                             bound=0.1,
                             lr=0.01,
                             query_limit=3000)

In [None]:
import torch.nn.functional as F

adv_logits = model(adv_img)
org_logits = model(test_img.to('cuda'))

print('Adversarial: predicted class is {}'.format(torch.argmax(adv_logits, dim=1)))
print('Original: predicted class is {}'.format(torch.argmax(org_logits, dim=1)))

print('Adversarial: logit of true class is {}'.format(adv_logits[0, test_label]))
print('Original: logit of true class is {}'.format(org_logits[0, test_label]))

print('Adversarial: probability of true class is {}'.format(F.softmax(adv_logits, dim=1)[0, test_label]))
print('Original: probability of true class is {}'.format(F.softmax(org_logits, dim=1)[0, test_label]))

In [4]:
from tqdm import tqdm 
import torch
import torch.nn.functional as F
from torch import nn
def NES(model, target_class, image, search_var, sample_num, g, u):
    #parameters
    n = sample_num #should be even
    N = image.size(2) #assume the image is N x N may subject to change
    
    #NES estimation
    g.zero_()
    with torch.no_grad():
        for i in range(n):
            u.normal_()
            g = g + F.softmax(model(image + search_var * u), dim =1)[0,target_class] * u
            g = g - F.softmax(model(image - search_var * u), dim =1)[0,target_class] * u #we assume the output of the model is ordered by class index
    return 1 / (2*n*search_var) * g

def PIA_adversarial_generator(model, initial_image, image, target_class, epsilon_adv, epsilon_0, search_var, sample_num, delta_epsilon, eta_max, eta_min, k=5):
    device = next(model.parameters()).device
    initial_image, image = initial_image.to(device), image.to(device)
    x_adv = image.clone()
    N = initial_image.size(2)
    g = torch.zeros(N, requires_grad=False).to(device)
    u = torch.randn((N, N)).to(device)
    
    epsilon = epsilon_0
    x_adv = torch.clamp(x_adv, initial_image - epsilon, initial_image + epsilon)
    new_class = target_class
    
    with torch.no_grad():
        time = 1
        while (epsilon > epsilon_adv) | (new_class != target_class):
            if time%20 == 0:
                print("20 times")
                print(F.softmax(model(x_adv), dim=1))
                print(torch.argmax(model(x_adv)))
                print(epsilon)
            time += 1
            gradient = NES(model, target_class, x_adv, search_var, sample_num, g, u)
            eta = eta_max
            x_adv_hat = x_adv - eta * gradient
            probabilities_adv = F.softmax(model(x_adv_hat), dim=1)
            top_probs, top_classes = torch.topk(probabilities_adv, k)
            while not target_class in top_classes[0]:
                if eta < eta_min:
                    epsilon += delta_epsilon
                    delta_epsilon /= 2
                    x_adv_hat = x_adv
                    break  
                eta /= 2
                x_adv_hat = torch.clamp(x_adv - eta * gradient, initial_image - epsilon, initial_image + epsilon)
            x_adv = x_adv_hat
            epsilon = epsilon - delta_epsilon
            new_class = torch.argmax(model(x_adv))
    return x_adv

In [5]:
test_img, test_label = testset[3]
test_img = test_img.unsqueeze(0)
initial_img, initial_label = testset[111]
initial_img = initial_img.unsqueeze(0)

In [6]:
print(initial_label)
print(test_label)

15
6


In [7]:
model.eval()
torch.argmax(model(initial_img.to('cuda')), dim=1)

tensor([15], device='cuda:0')

In [8]:
adv_img = PIA_adversarial_generator(model, initial_image = initial_img, 
                                    image = test_img, target_class = test_label,
                                    epsilon_adv = 0.05, epsilon_0 = 0.5,
                                    search_var = 1e-3, sample_num = 50,
                                    delta_epsilon = 1e-3, 
                                    eta_max = 0.01, eta_min = 0.005,
                                    k=5)

20 times
tensor([[2.0374e-06, 1.1924e-04, 4.2600e-07, 2.7792e-05, 1.4960e-05, 4.5288e-08,
         1.0205e-01, 1.0875e-03, 1.2743e-06, 2.4363e-03, 6.1711e-07, 4.9238e-07,
         5.8783e-08, 3.7343e-07, 2.1906e-05, 2.6281e-07, 7.9728e-05, 3.0662e-08,
         5.8256e-06, 4.9032e-07, 2.5062e-04, 2.2484e-06, 6.2691e-05, 1.8416e-04,
         6.3189e-05, 2.9964e-04, 9.2142e-07, 8.2076e-09, 1.1231e-06, 1.8921e-07,
         2.5577e-07, 8.0916e-04, 1.0462e-05, 1.7859e-04, 1.6533e-04, 9.5686e-07,
         5.4465e-06, 2.6835e-06, 2.4141e-04, 1.3511e-05, 8.3550e-05, 6.8761e-07,
         3.6833e-04, 5.4205e-04, 5.7056e-06, 9.5582e-05, 9.4168e-07, 9.3150e-03,
         1.0687e-05, 4.8317e-07, 3.3483e-04, 4.9779e-06, 8.4849e-07, 1.7952e-06,
         1.0313e-05, 8.9764e-06, 5.3263e-07, 8.6556e-08, 2.6397e-02, 1.6718e-08,
         1.4010e-05, 1.1547e-07, 6.9851e-07, 1.1322e-07, 3.9558e-05, 2.8570e-04,
         1.2209e-07, 1.0742e-06, 2.8430e-07, 6.4690e-04, 2.4013e-03, 8.6092e-05,
         4.6606e-07

KeyboardInterrupt: 