# Adversarial Attacks

In [None]:
import torch
from helpers import load_model, read_image, show_image, classifier
import adv_attacks as adv
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

## Loading the model
We first load a pretrained model and a classifier class that wraps the output of the model.

In [None]:
model = load_model(name='ResNet50')
clf = classifier(model)

## Classify an image

We now read an image and classify it with the model.

In [None]:
I = read_image('imgs/cat.jpg')
show_image(I)
clf(I, k=5)

## Perform an attack

We now perform an adversarial attack on the image in order to obtain an example that is misclassified.

In [None]:
attack = adv.Linf_pgsd(
    attack_iters=10,
    epsilon=0.05, 
    alpha=0.01,
    init_mode = 'zeros',
    verbosity = 1
)
xadv = attack(model, I)

show_image(xadv)
clf(xadv, k=3)

In [None]:
diff = (I-xadv)
diff -= diff.min()
show_image(diff/diff.max())

# Targeted Attacks

We now perform a targeted attack in order to obtain a certain classification. We first need to select a class.

In [None]:
for j, c in enumerate(clf.categ):
    print(str(j) + ': ' + c)

In [None]:
attack = adv.Linf_pgsd(targeted=True,
                      attack_iters=10, 
                      epsilon=0.1, 
                      alpha=0.01,
                      init_mode = 'uniform',
                      verbosity = 1)
xadv = attack(model, I, y=torch.tensor(([4])))

show_image(xadv)
clf(xadv, k=3)

In [None]:
diff = (I-xadv)
diff -= diff.min()
show_image(diff/diff.max())