In [None]:
!pip install foolbox==2.3.0

In [None]:
import foolbox
import numpy as np
import torch
import torchvision.models as models
from PIL import Image
import torchvision.transforms as transforms
from torch.autograd import Variable
from scipy.special import softmax
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
import os
import time 

In [None]:
# preprocess original image 
def preprocess_orig_image(image):

    im_orig = Image.open(image)
    im = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor()])(im_orig)
    im = im.reshape(3,224,224)
    return im

# create foolbox model given the PyTorch model
def test_model(model):
    model = model.eval()
    preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3)
    fmodel = foolbox.models.PyTorchModel(model, bounds=(0, 1), num_classes=1000, preprocessing=preprocessing)
    return fmodel


# get the labels for the images
def get_labels(label_orig, label_pert):

    labels = open(os.path.join('synset_words.txt'), 'r').read().split('\n')

    str_label_orig = labels[np.int(label_orig)].split(',')[0]
    str_label_pert = labels[np.int(label_pert)].split(',')[0]
    
    return str_label_orig,str_label_pert


# comptute the cosine similarity between 2 original image and adversarial image
def calculate_cossim(orig,pert):

    original_image = orig.flatten()
    perturbed_image = pert.flatten()

    dot_product = np.dot(original_image, perturbed_image)
    norm_a = np.linalg.norm(original_image)
    norm_b = np.linalg.norm(perturbed_image)
    cossimi = ((dot_product / (norm_a * norm_b)))
    return cossimi

# compute the structural similarity
def compute_ssim(image1,image2):
  img1_gray = rgb2gray(image1)
  img2_gray = rgb2gray(image2)
  score,diff = ssim(img1_gray,img2_gray,full=True)

  return score

# compute the mean square error between images
def mse(imageA, imageB):
    err = np.sum((imageA.astype("float") - imageB.astype("float")) ** 2)
    err /= float(imageA.shape[0] * imageA.shape[1])
    return err

- Load the desired model and convert it to a FoolBox style model
- Using foolbox utils, load 20 samples images from ImageNet
- Create CW attack object
- Run attack on the model, measuring the excution time for it

In [None]:
# set up model
model = models.vgg16(pretrained=True) 
fmodel = test_model(model)

# select 20 sample images from imagenet
images, labels = foolbox.utils.samples(dataset='imagenet',batchsize=20, data_format='channels_first',bounds=(0,1))
print(np.mean(fmodel.forward(images).argmax(axis=-1) == labels))

# run attack 
start_time = time.time()
attack = foolbox.attacks.CarliniWagnerL2Attack(fmodel, distance=foolbox.distances.MeanSquaredDistance)
adversarials = attack(images, labels, unpack=False)
end_time = time.time() - start_time

print ("Time taken: %.2f"%end_time)


im_advs_confidence = []
im_orig_confidence = []
cos_sim = []

# compute average confidence on original and adversarials and similarity 
for adv in adversarials:
  image_conf = np.amax(foolbox.utils.softmax(fmodel.forward_one(adv.unperturbed)))
  im_orig_confidence.append(image_conf)
  advs_conf = np.amax(foolbox.utils.softmax(fmodel.forward_one(adv.perturbed)))
  im_advs_confidence.append(advs_conf)
  orig = np.transpose(adv.unperturbed, (1, 2, 0))
  pert = np.transpose(adv.perturbed, (1, 2, 0))
  cos_sim.append(calculate_cossim(orig,pert))

print ("Average Cosine Sim: ",np.mean(np.asarray(cos_sim)))
print ("Original Conf %.2f%%: " % (np.mean(np.asarray(im_orig_confidence))*100))
print ("Adversarial %.2f%%: " % (np.mean(np.asarray(im_advs_confidence))*100))

distances = np.asarray([a.distance.value for a in adversarials])
print("{:.1e}, {:.1e}, {:.1e}".format(distances.min(), np.mean(distances), distances.max()))
print("{} of {} attacks failed".format(sum(adv.distance.value == np.inf for adv in adversarials), len(adversarials)))
print("{} of {} inputs misclassified without perturbation".format(sum(adv.distance.value == 0 for adv in adversarials), len(adversarials)))


0.9
Time taken: 1083.02
Average Cosine Sim:  0.9999989
Original Conf 81.04%: 
Adversarial 43.87%: 
0.0e+00, 4.7e-07, 3.3e-06
0 of 20 attacks failed
2 of 20 inputs misclassified without perturbation
