## Quelle: https://pytorch.org/tutorials/beginner/fgsm_tutorial.html

In [2]:
import os
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from scipy.special import softmax
import matplotlib.pyplot as plt
device = "cuda" if torch.cuda.is_available() else "cpu"
from torchvision import datasets
from torch.optim import SGD
from timeit import default_timer as timer
from torch_snippets import *
from torchvision import datasets, transforms
from torchvision.utils import save_image
from torchvision.datasets import ImageFolder
from PIL import Image

In [3]:
train_set = datasets.MNIST('data/', download=True, train=True)
train_images = train_set.data
train_targets = train_set.targets

In [4]:
test_set = datasets.MNIST('data/', download=True, train=False)
test_images = test_set.data
test_targets = test_set.targets

In [5]:
class MNISTDataset(Dataset):
    def __init__(self, x, y):
        x = x.float()/255
        x = x.view(-1, 28*28)
        self.x, self.y = x, y
    def __getitem__(self, ix):
        x, y = self.x[ix], self.y[ix]
        return x.to(device), y.to(device)
    def __len__(self):
        return len(self.x)

In [6]:
def get_data():
    train = MNISTDataset(train_images, train_targets)
    train_dl = DataLoader(train, batch_size=32, shuffle=True)
    test = MNISTDataset(test_images, test_targets)
    test_dl = DataLoader(test, batch_size=1, shuffle=True)
    return train_dl, test_dl

In [7]:
def get_model():
    model = nn.Sequential(
        nn.Linear(28 * 28, 40),
        nn.Sigmoid(),
        nn.Linear(40, 20),
        nn.Sigmoid(),
        nn.Linear(20, 10)
    ).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = SGD(model.parameters(), lr=1e-2)
    return model, loss_fn, optimizer

In [8]:
@torch.no_grad()
def accuracy(x, y, model):
    with torch.no_grad():
        prediction = model(x)
    max_values, argmaxes = prediction.max(-1)
    is_correct = argmaxes == y
    return is_correct.cpu().numpy().tolist()

In [9]:
@torch.no_grad()
def loss(x, y, model):
    prediction = model(x)
    loss = loss_fn(prediction, y)
    return loss.item()

FGSM attack code

In [10]:
def fgsm_attack(image, epsilon, data_grad):
    # Collect the element-wise sign of the data gradient
    sign_data_grad = data_grad.sign()
    # Create the perturbed image by adjusting each pixel of the input image
    perturbed_image = image + epsilon*sign_data_grad
    # Adding clipping to maintain [0,1] range
    perturbed_image = torch.clamp(perturbed_image, 0, 1)
    # Return the perturbed image
    return perturbed_image

In [10]:
def test( model, device, test_loader, epsilon ):
    # Accuracy counter
    correct = 0
    adv_examples = []

    # Loop over all examples in test set
    for data, target in test_loader:
        # Send the data and label to the device
        data, target = data.to(device), target.to(device)

        # Set requires_grad attribute of tensor. Important for Attack
        data.requires_grad = True

        # Forward pass the data through the model
        output = model(data)
        init_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability

        # If the initial prediction is wrong, dont bother attacking, just move on
        if init_pred.item() != target.item():
            continue

        # Calculate the loss
        loss = F.nll_loss(output, target)

        # Zero all existing gradients
        model.zero_grad()

        # Calculate gradients of model in backward pass
        loss.backward()

        # Collect datagrad
        data_grad = data.grad.data

        # Call FGSM Attack
        perturbed_data = fgsm_attack(data, epsilon, data_grad)

        # Re-classify the perturbed image
        output = model(perturbed_data)

        # Check for success
        final_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
        if final_pred.item() == target.item():
            correct += 1
        else:
            # Save some adv examples for visualization later
            if len(adv_examples) < 50:
                print("Found another one!")
                advEx = perturbed_data.squeeze().detach().cpu().numpy()
                origEx = data.squeeze().detach().cpu().numpy()
                adv_examples.append( (init_pred.item(), final_pred.item(), origEx, advEx) )

    # Calculate final accuracy for this epsilon
    final_acc = correct/float(len(test_loader))
    print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, correct, len(test_loader), final_acc))

    # Return the accuracy and an adversarial example
    return adv_examples

In [11]:
train_dl, test_dl = get_data()

In [12]:
model, loss_fn, optimizer = get_model()

In [13]:
model = model.to(device)

## Quelle: https://codingvision.net/iterative-fgvm-targeted-adversarial-samples-traffic-sign-recognition

In [14]:
model.load_state_dict(torch.load("nnMnist_40h_20h.ph"))

transform = T.Compose([    
     T.ToTensor()
])

datasetRnd = ImageFolder('data/randMNIST', transform)

dataloaderRnd = torch.utils.data.DataLoader(datasetRnd, batch_size=1, shuffle=True)

cntNoClass = 0
cntByClass = [0]*10

for imageRnd, targetRnd in dataloaderRnd:
    imageRnd = imageRnd.to(device)
    #imageRnd [1,3,28,28]
    imageRnd = imageRnd[:,0,:,:]
    #imageRnd [1,28,28]
    imageRnd = imageRnd.view(-1, 28*28)
    #imageRnd [1,784]
 
    output = model(imageRnd)
    outputSM = softmax(output.cpu().detach().numpy())
    
    #print( f'img output: {output} outputSM: {outputSM}')
    
    if 0.5 < outputSM.max():      
        cntNoClass += 1
        
        targetClass = 7
        
        imageOrig = imageRnd.detach().clone()
        
        firstIterAttackSucceeded = -1
        
        adversarial_targeted_class = torch.tensor([targetClass]).to(device)
        adversarial_sample = imageRnd.requires_grad_() 
        adversarial_optimizer = torch.optim.Adam([adversarial_sample], lr=1e-3)
        
        for i in range(2000):
            adversarial_optimizer.zero_grad()
            prediction = model(adversarial_sample)
            loss = torch.nn.CrossEntropyLoss()(prediction, adversarial_targeted_class)
            
             # this is the predicted class number
            predicted_class = np.argmax(prediction.cpu().detach().numpy(), axis=1)

            if targetClass==predicted_class and -1==firstIterAttackSucceeded:
                imageSucc = adversarial_sample.detach().clone()
                firstIterAttackSucceeded=i
            
            # updates gradient and backpropagates errors to the input
            loss.backward()
            adversarial_optimizer.step()
            
            # ensuring that the image is valid
            adversarial_sample.data = torch.clamp(adversarial_sample.data, 0, 1)

            #if i % 500 == 0:
            #    plt.imshow(adversarial_sample.data.cpu().view(28,28), cmap='gray', vmin=0, vmax=1)
            #    plt.show()

            #    print('Predicted:', predicted_class[0])
            #    print('Loss:', loss)
        
        if targetClass==predicted_class:
            torchvision.utils.save_image( imageOrig.view(28,28), f"adv{cntNoClass}_to{targetClass}_start.png" )
            torchvision.utils.save_image( imageSucc.view(28,28), f"adv{cntNoClass}_to{targetClass}_succ{firstIterAttackSucceeded}.png" )
            torchvision.utils.save_image( adversarial_sample.view(28,28), f"adv{cntNoClass}_to{targetClass}_optim.png" )
            break
            
    else:
        currentClass = np.argmax(outputSM)
        cntByClass[currentClass] += 1
        
print( f'cntNoClass: {cntNoClass}  cntByClass: {cntByClass}' )
        
#print(datasetRnd.class_to_idx)
#print(datasetRnd.imgs)

cntNoClass: 1  cntByClass: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [15]:
eps = 0.01
ex = test(model, device, test_dl, eps)

Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Found another one!
Epsilon: 0.01	Test Accuracy = 9232 / 10000 = 0.923

In [16]:
for i in range(len(ex)):
      currentResult = ex[i]
      initPred = currentResult[0]
      finalPred = currentResult[1]
      origEx = currentResult[2]
      advEx = currentResult[3]
        
      imagePath = "./{}".format(initPred)
      if not os.path.exists(imagePath):
          os.mkdir(imagePath)
        
      imAdv = Image.fromarray(255 * advEx.reshape((28,28)))
      imAdv = imAdv.convert('RGB')
      imAdv.save( "{}/sample{}_eps{}_now{}__adv.png".format(imagePath, i, eps, finalPred) )
      diff = advEx - origEx
        
      imOrig = Image.fromarray(255 * origEx.reshape((28,28)))
      imOrig = imOrig.convert('RGB')
      imOrig.save( "{}/sample{}_eps{}_now{}__orig.png".format(imagePath, i, eps, finalPred) )