# Adversarial Examples by Fast Gradient Sign

In [None]:
from IPython.display import clear_output, display
import numpy as np
import matplotlib.pyplot as plt

import torch as t
from torch.nn import Sequential, Linear, ReLU, LeakyReLU
import torchvision

import os

import utils as u

In [None]:
%matplotlib inline

In [None]:
if t.cuda.is_available():
    if t.cuda.device_count()>1:
        device = t.device('cuda:1')
    else:
        device = t.device('cuda')   
else:
    device = t.device('cpu')

In [None]:
#device=t.device('cpu')

## MNIST 

In [None]:
dl_train = t.utils.data.DataLoader(
    torchvision.datasets.MNIST('./data/mnist', train=True, download=True))

dl_test  = t.utils.data.DataLoader(
    torchvision.datasets.MNIST('./data/mnist', train=False, download=True))

In [None]:
mnist_train_data   = dl_train.dataset.data.to(dtype=t.float32).reshape(-1,28*28)/255.0
mnist_train_labels = dl_train.dataset.targets

In [None]:
mnist_test_data   = dl_test.dataset.data.to(dtype=t.float32).reshape(-1,28*28)/255.0
mnist_test_labels = dl_test.dataset.targets

In [None]:
train_dataset = t.utils.data.TensorDataset(mnist_train_data, mnist_train_labels)
test_dataset = t.utils.data.TensorDataset(mnist_test_data, mnist_test_labels)

In [None]:
train_loader = t.utils.data.DataLoader(train_dataset, batch_size=128)

In [None]:
if os.path.isfile('mnist_home_model.pt'):
    model = t.load('mnist_home_model.pt')
    pretrained = True
else:        
    model = utils.make_model(32, drop=0.0)
    model.apply(u.init_layer)
    pretrained = False
    

In [None]:
model.to(device)

In [None]:
u.accuracy(model, test_dataset[:][0], test_dataset[:][1])

In [None]:
ce = t.nn.CrossEntropyLoss()

In [None]:
optimizer = t.optim.Adam(model.parameters(), lr=0.0002)

In [None]:
%%time
if not pretrained:
    err_train=[]
    err_valid=[]
    for epoch in range(50):    
        for datum in train_loader:
            optimizer.zero_grad()
            (features,target) = datum
            pred = model(features)
            loss = ce(pred, target)
            loss.backward()
            optimizer.step()

        with t.no_grad():
            vpred  = model(test_dataset[:][0])
            vloss  = ce(vpred,test_dataset[:][1])
            err_valid.append(vloss)
            pred  = model(train_dataset[:][0])
            loss  = ce(pred,train_dataset[:][1])
            err_train.append(loss)
        clear_output()
        print("epoch %d %f %f %f %f" % 
              (epoch, loss, vloss, 
                accuracy(model, train_dataset[:][0],  train_dataset[:][1]),
               accuracy(model, test_dataset[:][0],  test_dataset[:][1])
                                       )   )

    plt.plot(err_train,c='b')
    plt.plot(err_valid,c='g')

In [None]:
vpred  = model(test_dataset[:][0])
vloss  = ce(vpred,test_dataset[:][1])
pred  = model(train_dataset[:][0])
loss  = ce(pred,train_dataset[:][1])
print(loss.item(), vloss.item())

In [None]:
print(u.accuracy(model, test_dataset[:][0],  test_dataset[:][1]) )
print(u.accuracy(model, train_dataset[:][0], train_dataset[:][1]) )

In [None]:
if not os.path.isfile('mnist_home_model.pt'):
    t.save(model,"mnist_home_model.pt")

In [None]:
u.model_detach(model)

In [None]:
idx = 455
X = train_dataset[idx:idx+1][0].clone()
L = train_dataset[idx:idx+1][1].clone()

In [None]:
plt.imshow(X.data.cpu().numpy().reshape(28,28), cmap='Greys')
print(L.item())

In [None]:
pred = model(X)
ce(pred,L)

In [None]:
u.prediction(model, X)

## Noise

In [None]:
eta = t.empty_like(X).normal_(0,0.1)

In [None]:
X_noisy = t.clamp(X+eta,0,1)

In [None]:
plt.imshow(X_noisy.detach().numpy().reshape(28,28), cmap='Greys')

In [None]:
u.prediction(model, X_noisy)

In [None]:
%time
test_noisy = test_dataset[:][0]+t.empty_like(test_dataset[:][0]).normal_(0.0, 0.1)
test_noisy = test_noisy.to(device);

In [None]:
u.accuracy(model, test_noisy, test_dataset[:][1])

## Linearty

> "Explaining and Harnessing Adversarial Examples", Ian J. Goodfellow, Jonathon Shlens, Christian Szegedy [arXiv:1412.6572](https://arxiv.org/abs/1412.6572)

$$\newcommand{\b}[1]{\mathbf{#1}}$$
$$  J(\b{x}) = \mathbf{w}\cdot \mathbf{x} $$

$$J(\b{x}+\b\delta)= \mathbf{w}\cdot \mathbf{x} + \mathbf{w}\cdot \mathbf{\delta}$$

In [None]:
w = np.random.uniform(-0.25, 0.25,1000)

In [None]:
x = np.random.uniform(0, 1,1000)

In [None]:
w @ x

In [None]:
epsilon = 0.2

In [None]:
noise = np.random.normal(0,epsilon, 1000)

In [None]:
w @(x+noise) - w @x

In [None]:
J_sample =  np.random.normal(0,epsilon, (5000,1000))@w
plt.hist(J_sample, bins=100, histtype='step');

$$\newcommand{\sign}{\operatorname{sign}}$$
$$\eta = \epsilon\sign \b{w}$$

$$\b{w}\cdot\b{\eta} = \epsilon\sum_{i=1}^N |w_i|  \sim \epsilon N $$

In [None]:
adv_noise = epsilon*np.sign(w)

In [None]:
w @(x+adv_noise) - w @x

### But Neural Networks are highly non-linear? Right?

### Wrong! Neural Networks are designed to be quite linear. 

### Fast Gradient Sign

$$J(\b{X}+\b{\delta}) \approx J(\b{X})+\nabla_{\b{X}} J(\b{X})\cdot\b{\delta}$$

$$\newcommand{\grad}{\operatorname{grad}}$$
$$\nabla_{\b{X}} J(\b{X})=\grad_{\b{X}} J(\b{X})
\equiv\frac{\partial J(\b{X})}{\partial {X_i}},
\quad i=1,\ldots,N$$

$$\delta = \epsilon \sign \nabla_{\b{X}} J(\b{X})$$

In [None]:
idx = 899
X = train_dataset[idx:idx+1][0].clone()
L = train_dataset[idx:idx+1][1]
print(L.item())

In [None]:
plt.imshow(X.data.cpu().numpy().reshape(28,28), cmap='Greys')
plt.text(22,3,'%d' % (L,), fontsize=32);

In [None]:
def FGS(model,orig, label, eps):
    orig.requires_grad_(True);
    if orig.grad is not None:
        orig.grad.zero_()
    loss = ce(model(orig.reshape(1,-1)), label.view(1))
    loss.backward() 
    XG = orig.grad
    eta = eps*XG.sign()
    orig.requires_grad_(False)
    return (orig+eta)

In [None]:
%%time
AdvX = FGS(model, X,L, 0.05)

In [None]:
plt.imshow(AdvX.data.cpu().numpy().reshape(28,28), cmap='Greys')
plt.text(22,3,'%d' % (L,), fontsize=32);

In [None]:
u.prediction(model, AdvX)

t.save(X, 'real_5.pt')
t.save(AdvX,'fake_4.pt')

In [None]:
%time
test_adv = t.stack([FGS(model,x,l,0.05) for x,l in zip(test_dataset[:][0], test_dataset[:][1])],dim=0)
test_adv=test_adv.to(device);

In [None]:
u.accuracy(model, test_adv, test_dataset[:][1])

In [None]:
plt.imshow(test_adv[99].data.cpu().numpy().reshape(28,28), cmap='Greys')
u.prediction(model,test_adv[99:100])

In [None]:
eps =  np.linspace(-0.1,0.1, 100)

X.requires_grad=True
X.grad.data.zero_()
out = model(X)
loss = ce(out,L)
loss.backward()
grad_X = X.grad.data.view(-1)


In [None]:
ls = []
eta = grad_X.sign()
#eta = t.ones(28*28)
print(eta@grad_X)
for e in eps:
    out = model(X.detach()+e*eta)
    loss = ce(out,L)
    ls.append(loss.item())

In [None]:
plt.plot(eps, ls)

In [None]:
outs =[]
eta = grad_X.sign()
for e in eps:
    out = model(X.detach()+e*eta)
    outs.append(out)

outs = t.stack(outs,0)
outs.squeeze_();

In [None]:

for i in range(10):
    plt.plot(eps,outs.numpy()[:,i], label='%d' % (i,))
plt.axvline(0)   
plt.axvline(0.05)
plt.legend()    

### But those examples are targeted for specific network?

### Not really ...

## Generalisation

In [None]:
if os.path.isfile('another_mnist.pt'):
    another_model = t.load('another_mnist.pt')
    another_pretrained = True
else:
    another_model = u.make_model(64,0.2)

    another_model.apply(u.init_layer)
    another_pretrained = False

In [None]:
another_optimizer=t.optim.Adam(another_model.parameters(), lr=0.0002, betas=[0.5, 0.999])

In [None]:

if not another_pretrained:
    err_train=[]
    err_valid=[]
    for epoch in range(20):    
        for datum in train_loader:
            another_optimizer.zero_grad()
            (features,target) = datum
            pred = another_model(features)
            loss = ce(pred, target)
            loss.backward()
            another_optimizer.step()

        with t.no_grad():
            vpred  = another_model(test_dataset[:][0])
            vloss  = ce(vpred,test_dataset[:][1])
            err_valid.append(vloss)
            pred  = another_model(train_dataset[:][0])
            loss  = ce(pred,train_dataset[:][1])
            err_train.append(loss)
        clear_output()
        print("epoch %d %f %f %f" % (epoch, loss, vloss,u.accuracy(another_model, test_dataset[:][0],  test_dataset[:][1])))   

    plt.plot(err_train,c='b')
    plt.plot(err_valid,c='g')

In [None]:
if not os.path.isfile('another_mnist.pt'):
    t.save(another_model, 'another_mnist.pt')

In [None]:
u.accuracy(another_model, test_dataset[:][0], test_dataset[:][1])

In [None]:
u.accuracy(another_model, test_adv, test_dataset[:][1])

## Adversarial trening

In [None]:
%time
train_adv = t.stack([FGS(model,x,l,0.05) for x,l in zip(train_dataset[:][0], 
                                                                train_dataset[:][1])],dim=0)
train_adv = train_adv.to(device);

In [None]:
adv_dataset = t.utils.data.TensorDataset(train_adv,train_dataset[:][1])

In [None]:
combined_dataset = t.utils.data.ConcatDataset((train_dataset, adv_dataset))

In [None]:
combined_loader = t.utils.data.DataLoader(combined_dataset, batch_size=128, shuffle=True)

In [None]:
u.model_atach(model)

In [None]:
err_train=[]
err_valid=[]
for epoch in range(1,21):    
    for datum in combined_loader:
        optimizer.zero_grad()
        (features,target) = datum
        pred = model(features)
        loss = ce(pred, target)
        loss.backward()
        optimizer.step()

    with t.no_grad():
        vpred  = model(test_dataset[:][0])
        vloss  = ce(vpred,test_dataset[:][1])
        err_valid.append(vloss)
        pred  = model(train_dataset[:][0])
        loss  = ce(pred,train_dataset[:][1])
        err_train.append(loss)
    clear_output()
    print("epoch %d %f %f %f" % (epoch, loss, vloss,u.accuracy(another_model, test_dataset[:][0],  test_dataset[:][1])))   

In [None]:
plt.plot(err_train,c='b')
plt.plot(err_valid,c='g')

In [None]:
u.accuracy(model, test_dataset[:][0], test_dataset[:][1])

In [None]:
u.accuracy(model, test_adv, test_dataset[:][1])

In [None]:
%time
test_adv = t.stack([FGS(model,x,l,0.05) for x,l in zip(test_dataset[:][0], test_dataset[:][1])],dim=0)
test_adv  =test_adv.to(device);

In [None]:
u.accuracy(model, test_adv, test_dataset[:][1])

## Targeted Fast Gradient Sign (T-FGS)

> "Adversarial examples in the physical world", Alexey Kurakin, Ian Goodfellow, Samy Bengio [arXiv:1607.02533](https://arxiv.org/abs/1607.02533)

In [None]:
model = t.load('mnist_home_model.pt')
u.model_detach(model)

In [None]:
idx = 89
Y = train_dataset[idx:idx+1][0].clone()
L = train_dataset[idx:idx+1][1]
print(L.item())

In [None]:
plt.imshow(Y.numpy().reshape(28,28), cmap='Greys')

In [None]:
u.prediction(model, Y)

In [None]:
T = t.LongTensor([3])

$$ J(X, T) $$

$$ X-\epsilon \nabla_X J(X, T) $$

In [None]:
def T_FGS(model,orig, label, target, eps):
    orig.requires_grad_(True);
    if orig.grad is not None:
        orig.grad.zero_()
    loss = ce(model(orig.reshape(1,-1)), target.view(1))
    loss.backward() 
    XG = orig.grad
    eta = eps*XG.sign()
    orig.requires_grad_(False)
    return t.clamp(orig-eta, 0,1)

In [None]:
Y_adv = T_FGS(model, Y,L,T,0.15)

In [None]:
plt.imshow(Y_adv.numpy().reshape(28,28), cmap='Greys')

In [None]:
u.prediction(model,Y_adv)

$$ X_0=X$$
$$ X_{i+1}-\epsilon \nabla_X J(X_i, T) $$

In [None]:
def T_FGS_I(model,orig, label, target, eps, n_iter=3):
    i_eps = eps/n_iter
    adv = orig
    for i in range(n_iter):
        adv=T_FGS(model, adv, label, target, i_eps)
    return adv    
        
    

In [None]:
Y_adv = T_FGS_I(model, Y,L,T,0.1)

In [None]:
plt.imshow(Y_adv.numpy().reshape(28,28), cmap='Greys');

In [None]:
u.prediction(model,Y_adv)