### Setup

In [1]:
import torch
import numpy as np
import timm
import torchvision as tvision
import tensorflow as tf
import requests

from PIL import Image
from urllib.request import urlopen
from transformers import AutoImageProcessor, ResNetForImageClassification

from art.estimators.classification import PyTorchClassifier
from art.attacks.evasion import (CarliniL2Method, ProjectedGradientDescent, SaliencyMapMethod,
                                 ElasticNet, DeepFool, HopSkipJump)

from cleverhans.torch.attacks.projected_gradient_descent import projected_gradient_descent
from cleverhans.tf2.attacks.carlini_wagner_l2 import carlini_wagner_l2

class DoubleModelWrapper(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model.to(dtype=torch.double).cuda()

    def forward(self, x):
        x = x.to(dtype=torch.double)
        outputs = self.model(x)
        logits = outputs.logits.to(dtype=torch.double)
        return logits
    
class DoubleModelTimmWrapper(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model.to(dtype=torch.double)

    def forward(self, x):
        x = x.to(dtype=torch.double)
        outputs = self.model(x)
        return outputs.to(dtype=torch.double)


# Setting up models

mobile_model = timm.create_model('tf_mobilenetv3_large_minimal_100.in1k', pretrained=True)
mobile_model = mobile_model.cuda().eval()

mobile_data_config = timm.data.resolve_model_data_config(mobile_model)
mobile_transforms = timm.data.create_transform(**mobile_data_config, is_training=False)

vgg16_model = timm.create_model('vgg16.tv_in1k', pretrained=True)
vgg16_model = vgg16_model.cuda().eval()

vgg16_data_config = timm.data.resolve_model_data_config(vgg16_model)
vgg16_transforms = timm.data.create_transform(**vgg16_data_config, is_training=False)

processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
hf_resnet = ResNetForImageClassification.from_pretrained("microsoft/resnet-50")
resnet_model = DoubleModelWrapper(hf_resnet)


# Downloading labels for ImageNet1k
url = "https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json"
imagenet_labels = requests.get(url).json()

# prepare image
image = Image.open(urlopen("https://transforms.stlzoo.org/production/animals/red-kangaroo-02-01.jpg?w=1200&h=1200&auto=compress%2Cformat&fit=crop&dm=1654795233&s=5f137aa9a410a7ea3386c6972265111d"))

img_transforms = tvision.transforms.Compose([
    tvision.transforms.Resize((224, 224)),
    tvision.transforms.ToTensor()
])

clever_img_torch = img_transforms(image).unsqueeze(0)
clever_img_tf = tf.convert_to_tensor(image, dtype=tf.float32)
art_img_np = clever_img_torch.numpy()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(torch.cuda.is_available())

True


## Directed Attacks

In [3]:
# helper function for getting probability, class id and class name
def convert2label(pred, is_clever=False):
    label_id, probability = 0, 0.
    
    if not is_clever:
        tensor_pred = torch.from_numpy(pred)
        probabilities = torch.nn.functional.softmax(tensor_pred, dim=1)
        label_id = int(torch.argmax(tensor_pred))
        probability = torch.max(probabilities)

    else:
        probability = torch.max(pred)
        label_id = int(torch.argmax(pred))
        
    return label_id, probability

In [4]:
# helper function for executing ART attacks
import time

def art_attack(attack, img, classifier, targeted=False, target=None):

    start = time.time()

    adv_img = attack.generate(x=img) if not targeted else attack.generate(x=img, y=target)

    normal_pred = classifier.predict(img)
    adv_pred = classifier.predict(adv_img)
    
    attack_time = time.time() - start

    return normal_pred, adv_pred, attack_time 


### Adversarial Robustness Toolbox

In [5]:
# prepare classifiers
mobile_classifier = PyTorchClassifier(
    model = mobile_model,
    loss = torch.nn.CrossEntropyLoss(),
    input_shape=(3,224, 224),
    nb_classes=len(imagenet_labels),
    optimizer=None,
    clip_values=(0,1)
)

vgg16_classifier = PyTorchClassifier(
    model = vgg16_model,
    loss = torch.nn.CrossEntropyLoss(),
    input_shape=(3,224,224),
    nb_classes=len(imagenet_labels),
    optimizer=None,
    clip_values=(0,1)
)

resnet_classifier = PyTorchClassifier(
    model = resnet_model,
    loss = torch.nn.CrossEntropyLoss(),
    input_shape=(3,224,224),
    nb_classes=len(imagenet_labels),
    optimizer=None,
    clip_values=(0,1)
)

#### Carlini Wagner L2

In [None]:
classifier_tab = [mobile_classifier, vgg16_classifier, resnet_classifier]
classifier_names = ["mobile_v3", "vgg16", "resnet50"]
iter = 5
norm_probs = np.zeros(iter)
norm_ids = np.zeros(iter)
adv_probs = np.zeros(iter)
adv_ids = np.zeros(iter)
times = np.zeros(iter)

target = np.zeros((1, len(imagenet_labels)))
target[0, 123] = 1


for classifier, name in zip(classifier_tab, classifier_names):
    
    attack = CarliniL2Method(classifier=classifier, targeted=True, learning_rate=0.1)
    
    for i in range(iter):
        norm, adv, att_time = art_attack(attack, img=art_img_np, classifier=classifier, targeted=True, target = target)
        norm_id, norm_prob = convert2label(norm)
        adv_id, adv_prob = convert2label(adv)
        
        times[i] = att_time
        norm_ids[i], norm_probs[i] = norm_id, norm_prob
        adv_ids[i], adv_probs[i] = adv_id, adv_prob
    
    if (not all(id == norm_ids[0] for id in norm_ids)) or (not all(id == adv_ids[0] for id in adv_ids)):
        print("ERROR: Not all ids in table are the same")
        break
    
    print("===================================================================================================================================================\n")
    print(f"Model: {name}, Correct class: {imagenet_labels[int(norm_ids[0])]}, Adversarial class: {imagenet_labels[int(adv_ids[0])]}\n")
    print(f"Average time: {np.average(times)}, Average normal probability: {np.average(norm_probs)}, Average adversary probability: {np.average(adv_probs)}")
    print("\n")
    

C&W L_2: 100%|██████████| 1/1 [00:14<00:00, 14.55s/it]
C&W L_2: 100%|██████████| 1/1 [00:14<00:00, 14.11s/it]
C&W L_2: 100%|██████████| 1/1 [00:13<00:00, 13.99s/it]
C&W L_2: 100%|██████████| 1/1 [00:13<00:00, 13.55s/it]
C&W L_2: 100%|██████████| 1/1 [00:13<00:00, 13.08s/it]



Model: mobile_v3, Correct class: wallaby, Adversarial class: spiny lobster

Average time: 13.896895599365234, Average normal probability: 0.2399435043334961, Average adversary probability: 0.011769927106797695




C&W L_2: 100%|██████████| 1/1 [03:16<00:00, 196.63s/it]
C&W L_2: 100%|██████████| 1/1 [03:19<00:00, 199.93s/it]
C&W L_2: 100%|██████████| 1/1 [03:22<00:00, 202.86s/it]
C&W L_2: 100%|██████████| 1/1 [03:25<00:00, 205.03s/it]
C&W L_2: 100%|██████████| 1/1 [03:25<00:00, 205.80s/it]



Model: vgg16, Correct class: dingo, Adversarial class: spiny lobster

Average time: 202.8521964073181, Average normal probability: 0.371273010969162, Average adversary probability: 0.06096094101667404




C&W L_2: 100%|██████████| 1/1 [01:43<00:00, 103.94s/it]
C&W L_2: 100%|██████████| 1/1 [01:45<00:00, 105.47s/it]
C&W L_2: 100%|██████████| 1/1 [01:43<00:00, 103.96s/it]
C&W L_2: 100%|██████████| 1/1 [01:41<00:00, 101.91s/it]
C&W L_2: 100%|██████████| 1/1 [01:42<00:00, 102.04s/it]



Model: resnet50, Correct class: wallaby, Adversarial class: spiny lobster

Average time: 103.77791666984558, Average normal probability: 0.9998335838317871, Average adversary probability: 0.455068439245224




#### PGD

In [8]:
classifier_tab = [mobile_classifier, vgg16_classifier, resnet_classifier]
classifier_names = ["mobile_v3", "vgg16", "resnet50"]
iter = 5
norm_probs = np.zeros(iter)
norm_ids = np.zeros(iter)
adv_probs = np.zeros(iter)
adv_ids = np.zeros(iter)
times = np.zeros(iter)

target = np.zeros((1, len(imagenet_labels)))
target[0, 123] = 1


for classifier, name in zip(classifier_tab, classifier_names):
    
    attack = ProjectedGradientDescent(estimator=classifier, norm=np.inf, targeted=True)
    
    for i in range(iter):
        norm, adv, att_time = art_attack(attack, img=art_img_np, classifier=classifier, targeted=True, target = target)
        norm_id, norm_prob = convert2label(norm)
        adv_id, adv_prob = convert2label(adv)
        
        times[i] = att_time
        norm_ids[i], norm_probs[i] = norm_id, norm_prob
        adv_ids[i], adv_probs[i] = adv_id, adv_prob
    
    if (not all(id == norm_ids[0] for id in norm_ids)) or (not all(id == adv_ids[0] for id in adv_ids)):
        print("ERROR: Not all ids in table are the same")
        break
    
    print("===================================================================================================================================================\n")
    print(f"Model: {name}, Correct class: {imagenet_labels[int(norm_ids[0])]}, Adversarial class: {imagenet_labels[int(adv_ids[0])]}\n")
    print(f"Average time: {np.average(times)}, Average normal probability: {np.average(norm_probs)}, Average adversary probability: {np.average(adv_probs)}")
    print("\n")

                                                            


Model: mobile_v3, Correct class: wallaby, Adversarial class: spiny lobster

Average time: 4.578286123275757, Average normal probability: 0.2399435043334961, Average adversary probability: 0.9999837875366211




                                                            


Model: vgg16, Correct class: dingo, Adversarial class: spiny lobster

Average time: 65.88371543884277, Average normal probability: 0.371273010969162, Average adversary probability: 0.9997336268424988




                                                            


Model: resnet50, Correct class: wallaby, Adversarial class: spiny lobster

Average time: 34.450180530548096, Average normal probability: 0.9998335838317871, Average adversary probability: 1.0




#### Jacob Saliency Map Attack (JSMA)

In [10]:
classifier_tab = [mobile_classifier, vgg16_classifier, resnet_classifier]
classifier_names = ["mobile_v3", "vgg16", "resnet50"]
iter = 5
norm_probs = np.zeros(iter)
norm_ids = np.zeros(iter)
adv_probs = np.zeros(iter)
adv_ids = np.zeros(iter)
times = np.zeros(iter)

target = np.zeros((1, len(imagenet_labels)))
target[0, 123] = 1


for classifier, name in zip(classifier_tab, classifier_names):
    
    attack = SaliencyMapMethod(classifier=classifier, theta=0.3, gamma=0.05)
    
    for i in range(iter):
        norm, adv, att_time = art_attack(attack, img=art_img_np, classifier=classifier, targeted=True, target = target)
        norm_id, norm_prob = convert2label(norm)
        adv_id, adv_prob = convert2label(adv)
        
        times[i] = att_time
        norm_ids[i], norm_probs[i] = norm_id, norm_prob
        adv_ids[i], adv_probs[i] = adv_id, adv_prob
    
    if (not all(id == norm_ids[0] for id in norm_ids)) or (not all(id == adv_ids[0] for id in adv_ids)):
        print("ERROR: Not all ids in table are the same")
        break
    
    print("===================================================================================================================================================\n")
    print(f"Model: {name}, Correct class: {imagenet_labels[int(norm_ids[0])]}, Adversarial class: {imagenet_labels[int(adv_ids[0])]}\n")
    print(f"Average time: {np.average(times)}, Average normal probability: {np.average(norm_probs)}, Average adversary probability: {np.average(adv_probs)}")
    print("\n")

KeyboardInterrupt: 

### Non Targeted Attacks

#### ART

In [10]:
classifier_tab = [mobile_classifier, vgg16_classifier, resnet_classifier]
classifier_names = ["mobile_v3", "vgg16", "resnet50"]
iter = 5
norm_probs = np.zeros(iter)
norm_ids = np.zeros(iter)
adv_probs = np.zeros(iter)
adv_ids = np.zeros(iter)
times = np.zeros(iter)


for classifier, name in zip(classifier_tab, classifier_names):
    
    attack = DeepFool(classifier=classifier)
    
    for i in range(iter):
        norm, adv, att_time = art_attack(attack, img=art_img_np, classifier=classifier)
        norm_id, norm_prob = convert2label(norm)
        adv_id, adv_prob = convert2label(adv)
        
        times[i] = att_time
        norm_ids[i], norm_probs[i] = norm_id, norm_prob
        adv_ids[i], adv_probs[i] = adv_id, adv_prob
    
    if (not all(id == norm_ids[0] for id in norm_ids)) or (not all(id == adv_ids[0] for id in adv_ids)):
        print("ERROR: Not all ids in table are the same")
        break
    
    print("===================================================================================================================================================\n")
    print(f"Model: {name}, Correct class: {imagenet_labels[int(norm_ids[0])]}, Adversarial class: {imagenet_labels[int(adv_ids[0])]}\n")
    print(f"Average time: {np.average(times)}, Average normal probability: {np.average(norm_probs)}, Average adversary probability: {np.average(adv_probs)}")
    print("\n")

DeepFool: 100%|██████████| 1/1 [00:00<00:00,  1.11it/s]
DeepFool: 100%|██████████| 1/1 [00:00<00:00,  1.07it/s]
DeepFool: 100%|██████████| 1/1 [00:00<00:00,  1.14it/s]
DeepFool: 100%|██████████| 1/1 [00:00<00:00,  1.16it/s]
DeepFool: 100%|██████████| 1/1 [00:00<00:00,  1.11it/s]



Model: mobile_v3, Correct class: wallaby, Adversarial class: coyote

Average time: 0.9639941215515136, Average normal probability: 0.2399435043334961, Average adversary probability: 0.15309089422225952




DeepFool: 100%|██████████| 1/1 [00:10<00:00, 10.90s/it]
DeepFool: 100%|██████████| 1/1 [00:10<00:00, 10.96s/it]
DeepFool: 100%|██████████| 1/1 [00:10<00:00, 10.16s/it]
DeepFool: 100%|██████████| 1/1 [00:11<00:00, 11.11s/it]
DeepFool: 100%|██████████| 1/1 [00:10<00:00, 10.32s/it]



Model: vgg16, Correct class: dingo, Adversarial class: hare

Average time: 11.292233276367188, Average normal probability: 0.371273010969162, Average adversary probability: 0.926469624042511




DeepFool: 100%|██████████| 1/1 [00:15<00:00, 15.67s/it]
DeepFool: 100%|██████████| 1/1 [00:14<00:00, 14.07s/it]
DeepFool: 100%|██████████| 1/1 [00:14<00:00, 14.95s/it]
DeepFool: 100%|██████████| 1/1 [00:13<00:00, 13.59s/it]
DeepFool: 100%|██████████| 1/1 [00:13<00:00, 13.32s/it]



Model: resnet50, Correct class: wallaby, Adversarial class: kit fox

Average time: 14.658172750473023, Average normal probability: 0.9998335838317871, Average adversary probability: 0.5609124898910522




In [None]:
mobile_model_double = DoubleModelTimmWrapper(mobile_model)
vgg16_model_double = DoubleModelTimmWrapper(vgg16_model)

mobile_classifier_double = PyTorchClassifier(
    model = mobile_model_double,
    loss = torch.nn.CrossEntropyLoss(),
    input_shape=(3,224,224),
    nb_classes=len(imagenet_labels),
    optimizer=None,
    clip_values=(0,1)
)

vgg16_classifier_double = PyTorchClassifier(
    model = vgg16_model_double,
    loss = torch.nn.CrossEntropyLoss(),
    input_shape=(3,224,224),
    nb_classes=len(imagenet_labels),
    optimizer=None,
    clip_values=(0,1)
)

classifier_tab = [mobile_classifier_double, resnet_classifier, vgg16_classifier_double]
classifier_names = ["mobile_v3", "resnet50", "vgg16"]
iter = 5
norm_probs = np.zeros(iter)
norm_ids = np.zeros(iter)
adv_probs = np.zeros(iter)
adv_ids = np.zeros(iter)
times = np.zeros(iter)


for classifier, name in zip(classifier_tab, classifier_names):
    
    attack = HopSkipJump(classifier=classifier, norm=2, max_iter=10)
    
    for i in range(iter):
        norm, adv, att_time = art_attack(attack, img=art_img_np, classifier=classifier)
        norm_id, norm_prob = convert2label(norm)
        adv_id, adv_prob = convert2label(adv)
        
        times[i] = att_time
        norm_ids[i], norm_probs[i] = norm_id, norm_prob
        adv_ids[i], adv_probs[i] = adv_id, adv_prob
    
    # think of a way to list all guessed classes from adversary
    if (not all(id == norm_ids[0] for id in norm_ids)) or (not all(id == adv_ids[0] for id in adv_ids)):
        print("ERROR: Not all ids in table are the same")
        break
    
    print("===================================================================================================================================================\n")
    print(f"Model: {name}, Correct class: {imagenet_labels[int(norm_ids[0])]}, Adversarial class: {imagenet_labels[int(adv_ids[0])]}\n")
    print(f"Average time: {np.average(times)}, Average normal probability: {np.average(norm_probs)}, Average adversary probability: {np.average(adv_probs)}")
    print("\n")

HopSkipJump: 100%|██████████| 1/1 [00:23<00:00, 23.43s/it]
HopSkipJump: 100%|██████████| 1/1 [00:23<00:00, 23.77s/it]
HopSkipJump: 100%|██████████| 1/1 [00:24<00:00, 24.24s/it]
HopSkipJump: 100%|██████████| 1/1 [00:24<00:00, 24.40s/it]
HopSkipJump: 100%|██████████| 1/1 [00:23<00:00, 23.61s/it]



Model: mobile_v3, Correct class: wallaby, Adversarial class: coyote

Average time: 23.987810611724854, Average normal probability: 0.23994162678718567, Average adversary probability: 0.18785407841205598




HopSkipJump: 100%|██████████| 1/1 [02:58<00:00, 178.93s/it]
HopSkipJump: 100%|██████████| 1/1 [03:05<00:00, 185.48s/it]
HopSkipJump: 100%|██████████| 1/1 [03:02<00:00, 182.93s/it]
HopSkipJump: 100%|██████████| 1/1 [03:05<00:00, 185.52s/it]
HopSkipJump: 100%|██████████| 1/1 [03:03<00:00, 183.26s/it]


ERROR: Not all ids in table are the same


In [6]:
classifier_tab = [mobile_classifier_double, vgg16_classifier_double, resnet_classifier]
classifier_names = ["mobile_v3", "vgg16", "resnet50"]
iter = 5
norm_probs = np.zeros(iter)
norm_ids = np.zeros(iter)
adv_probs = np.zeros(iter)
adv_ids = np.zeros(iter)
times = np.zeros(iter)


for classifier, name in zip(classifier_tab, classifier_names):
    
    attack = ElasticNet(classifier=classifier, max_iter=10)
    
    for i in range(iter):
        norm, adv, att_time = art_attack(attack, img=art_img_np, classifier=classifier)
        norm_id, norm_prob = convert2label(norm)
        adv_id, adv_prob = convert2label(adv)
        
        times[i] = att_time
        norm_ids[i], norm_probs[i] = norm_id, norm_prob
        adv_ids[i], adv_probs[i] = adv_id, adv_prob
    
    if (not all(id == norm_ids[0] for id in norm_ids)) or (not all(id == adv_ids[0] for id in adv_ids)):
        print("ERROR: Not all ids in table are the same")
        break
    
    print("===================================================================================================================================================\n")
    print(f"Model: {name}, Correct class: {imagenet_labels[int(norm_ids[0])]}, Adversarial class: {imagenet_labels[int(adv_ids[0])]}\n")
    print(f"Average time: {np.average(times)}, Average normal probability: {np.average(norm_probs)}, Average adversary probability: {np.average(adv_probs)}")
    print("\n")

EAD: 100%|██████████| 1/1 [00:39<00:00, 39.76s/it]
EAD: 100%|██████████| 1/1 [00:39<00:00, 39.21s/it]
EAD: 100%|██████████| 1/1 [00:37<00:00, 37.05s/it]
EAD: 100%|██████████| 1/1 [00:36<00:00, 36.19s/it]
EAD: 100%|██████████| 1/1 [00:38<00:00, 38.28s/it]



Model: mobile_v3, Correct class: wallaby, Adversarial class: coyote

Average time: 38.520414638519284, Average normal probability: 0.23994162678718567, Average adversary probability: 0.18650758266448975




EAD: 100%|██████████| 1/1 [04:08<00:00, 248.88s/it]
EAD: 100%|██████████| 1/1 [04:07<00:00, 247.10s/it]
EAD: 100%|██████████| 1/1 [04:07<00:00, 247.46s/it]
EAD: 100%|██████████| 1/1 [04:06<00:00, 246.01s/it]
EAD: 100%|██████████| 1/1 [04:03<00:00, 243.95s/it]



Model: vgg16, Correct class: dingo, Adversarial class: hare

Average time: 248.32443704605103, Average normal probability: 0.3712729513645172, Average adversary probability: 0.23584821820259094




EAD: 100%|██████████| 1/1 [01:07<00:00, 67.20s/it]
EAD: 100%|██████████| 1/1 [01:05<00:00, 65.46s/it]
EAD: 100%|██████████| 1/1 [01:05<00:00, 65.26s/it]
EAD: 100%|██████████| 1/1 [01:05<00:00, 65.07s/it]
EAD: 100%|██████████| 1/1 [01:05<00:00, 65.18s/it]



Model: resnet50, Correct class: wallaby, Adversarial class: kit fox

Average time: 66.11154022216797, Average normal probability: 0.9998335838317871, Average adversary probability: 0.4972759783267975


