# Transferability of Adversarial Examples
This is a small experiment relating to the trasferability of adversarial examples, here we go.

## Import Essential Packages

In [3]:
import torch
import torchvision

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision.transforms as transforms
import numpy as np
import os
from google.colab import drive

As the imagenet pretrained model of tensorflow can't be trained in a shor time, I implement it with **pytorch** whose cifar-10 pretrained model can be downloaded from github.

In [4]:
drive.mount("/content/drive")
path = "/content/drive/My Drive/adv"

os.chdir(path)
os.listdir(path)

from cifar10_models import *

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Define Constants
Constants like batch size.

In [5]:
batch_size = 64
epochs = 100

## Import Dataset
Experiment on on CIFAR-10 implemented with pytorch.

In [10]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


## Define Models
Github has some pretrained cifar-10 model structures implemented with pytorch, so I directly use them. And I first try VGG-16, ResNet-50 and Inception v3.

In [None]:
# This setp has been done.
# !python cifar10_download.py

100% 2.46G/2.46G [01:24<00:00, 29.0MMiB/s]
Download successful. Unzipping file.
Unzip file successful!


In [16]:
del resnet50
del vgg16
del inception_v3
from cifar10_models import *

In [None]:
# VGG16
vgg16 = vgg16_bn(pretrained=True)
# ResNet50
resnet50 = resnet50(pretrained=True)
# Inception v3
inception_v3 = inception_v3(pretrained=True)

# Evaluation mode
vgg16.eval()
resnet50.eval()
inception_v3.eval()

# Move the models to GPU
device = torch.device("cuda")
vgg16.to(device)
resnet50.to(device)
inception_v3.to(device)

# Check GPU
print(torch.cuda.get_device_name(0))

## Train Models
As the models are pretrained, so the training section is omitted, or we can train for more epochs for better accuracy.

## Test Model
Test the accuracy of the models.

In [20]:
testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=2)

In [None]:
# evaluation mode
vgg16.eval()
resnet50.eval()
inception_v3.eval()

In [23]:
correct_vgg = correct_resnet = correct_inception = 0
total = 0
with torch.no_grad():
  for data in testloader:
    images, labels = data
    images = images.to(device)
    labels = labels.to(device)
    outputs_vgg = vgg16(images)
    outputs_resnet = resnet50(images)
    outputs_inception = inception_v3(images)
    _, predicted_vgg = torch.max(outputs_vgg.data, 1)
    _, predicted_resnet = torch.max(outputs_resnet.data, 1)
    _, predicted_inception = torch.max(outputs_inception.data, 1)
    total += labels.size(0)
    correct_vgg += (predicted_vgg == labels).sum().item()
    correct_resnet += (predicted_resnet == labels).sum().item()
    correct_inception += (predicted_inception == labels).sum().item()


print('Accuracy of the vgg16 on the 10000 test images: %d%%' % (100 * correct_vgg / total))
print('Accuracy of the resnet50 on the 10000 test images: %d%%' % (100 * correct_resnet / total))
print('Accuracy of the inception_v3 on the 10000 test images: %d%%' % (100 * correct_inception / total))

Accuracy of the vgg16 on the 10000 test images: 81%
Accuracy of the resnet50 on the 10000 test images: 81%
Accuracy of the inception_v3 on the 10000 test images: 85%


## Adversarial Examples
Here I am going to generate some adversarial examples of one specific model structure, then apply them to other models to check the transibility of adversarial examples.

In [12]:
# FGSM attack code
def fgsm_attack(image, epsilon, data_grad):
  # Collect the element-wise sign of the data gradient
  sign_data_grad = data_grad.sign()
  # Create the perturbed image by adjusting each pixel of the input image
  perturbed_image = image + epsilon*sign_data_grad
  # Adding clipping to maintain [0,1] range
  # perturbed_image = torch.clamp(perturbed_image, 0, 1)
  # Return the perturbed image
  return perturbed_image


def test(target_model, other_model_1, other_model_2, device, test_loader, epsilon):

  # Accuracy counter
  target_correct = other_correct_1 =other_correct_2 = 0
  valid_examples = valid_examples_1 = valid_examples_2 = 0
  adv_examples = []

  # Loop over all examples in test set
  for data, target in test_loader:
    # Send the data and label to the device
    data, target = data.to(device), target.to(device)

    # Set requires_grad attribute of tensor. Important for Attack
    data.requires_grad = True

    # Forward pass the data through the model
    output = target_model(data)
    other_output_1 = other_model_1(data)
    other_output_2 = other_model_2(data)
    init_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
    other_init_pred_1 = other_output_1.max(1, keepdim=True)[1]
    other_init_pred_2 = other_output_2.max(1, keepdim=True)[1]

    # Model 1
    # If the initial prediction is wrong, dont bother attacking, just move on
    if init_pred.item() != target.item():
      continue
    else:
      valid_examples += 1
      # Calculate the loss
      loss = F.nll_loss(output, target)
      # Zero all existing gradients
      target_model.zero_grad()
      # Calculate gradients of model in backward pass
      loss.backward()
      # Collect datagrad
      data_grad = data.grad.data
      # Call FGSM Attack
      perturbed_data = fgsm_attack(data, epsilon, data_grad)
      # Re-classify the perturbed image
      output = target_model(perturbed_data)

      # Check for success
      final_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
      if final_pred.item() == target.item():
        target_correct += 1
        # Special case for saving 0 epsilon examples
        if (epsilon == 0) and (len(adv_examples) < 5):
          adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
          adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )
        else:
          # Save some adv examples for visualization later
          if len(adv_examples) < 5:
            adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
            adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )

    # Model 2
    if other_init_pred_1.item() != target.item():
      pass
    else:
      valid_examples_1 += 1
      # Re-classify the perturbed image
      other_output_1 = other_model_1(perturbed_data)

      # Check for success
      final_pred = other_output_1.max(1, keepdim=True)[1] # get the index of the max log-probability
      if final_pred.item() == target.item():
        other_correct_1 += 1



    # Model 3
    if other_init_pred_2.item() != target.item():
      pass
    else:
      valid_examples_2 += 1
      # Re-classify the perturbed image
      other_output_2 = other_model_2(perturbed_data)

      # Check for success
      final_pred = other_output_2.max(1, keepdim=True)[1] # get the index of the max log-probability
      if final_pred.item() == target.item():
        other_correct_2 += 1
   

  # Calculate final accuracy for this epsilon
  final_acc = []
  final_acc_1 = target_correct/float(valid_examples)
  final_acc_2 = other_correct_1/float(valid_examples_1)
  final_acc_3 = other_correct_2/float(valid_examples_2)
    
  final_acc.append(final_acc_1)
  final_acc.append(final_acc_2)
  final_acc.append(final_acc_3)

  print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, target_correct, valid_examples, final_acc_1))
  print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, other_correct_1, valid_examples_1, final_acc_2))
  print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, other_correct_2, valid_examples_2, final_acc_3))

  # Return the accuracy and an adversarial example
  return final_acc, adv_examples

Here define a test loader to generate adversarial exapmles, whose batch size is 1, since the algorithm can calculate one loss for the adversarial examples once.

In [13]:
adv_test_loader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False)

For different epsilons, the adversarial examples has different effect, here we choose a relatively large epsilons whose effect may be more manifest.

In [18]:
epsilons = [0.2]
accuracies_vgg_based = []
examples_vgg = []
# Test VGG16
for eps in epsilons:
  acc, ex = test(vgg16, resnet50, inception_v3, device, adv_test_loader, eps)
  accuracies_vgg_based.append(acc)
  examples_vgg.append(ex)

Epsilon: 0.2	Test Accuracy = 1374 / 8139 = 0.16881680796166607
Epsilon: 0.2	Test Accuracy = 1711 / 7448 = 0.22972610096670246
Epsilon: 0.2	Test Accuracy = 1441 / 7674 = 0.1877769090435236


In [24]:
accuracies_resnet_based = []
examples_resnet = []
# Test ResNet50
for eps in epsilons:
    acc, ex = test(resnet50, vgg16, inception_v3, device, adv_test_loader, eps)
    accuracies_resnet_based.append(acc)
    examples_resnet.append(ex)

Epsilon: 0.2	Test Accuracy = 1538 / 8133 = 0.18910611090618468
Epsilon: 0.2	Test Accuracy = 1465 / 7448 = 0.19669709989258863
Epsilon: 0.2	Test Accuracy = 1460 / 7661 = 0.19057564286646653


In [19]:
accuracies_inception_based = []
examples_inception = []
# Test Inception v3
for eps in epsilons:
    acc, ex = test(inception_v3, vgg16, resnet50, device, adv_test_loader, eps)
    accuracies_inception_based.append(acc)
    examples_inception.append(ex)

Epsilon: 0.2	Test Accuracy = 1793 / 8588 = 0.20877969259431764
Epsilon: 0.2	Test Accuracy = 1561 / 7674 = 0.20341412561897315
Epsilon: 0.2	Test Accuracy = 2000 / 7661 = 0.26106252447461165


## Result

| Data Type / Accuracy/ Model Structure | VGG-16 | ResNet-50 |  Inception v3 |
|                 :----:                | :----: |   :----:  | :----:|
| Normal Data                           |   81%  |    81%    | 85% |
| VGG-16 based Adversarial Data         |1374 / 8139 = 16.88%|1711 / 7448 = 22.97%|1441 / 7674 = 18.78%|
| ResNet-50 based Adversarial Data      |1465 / 7448 = 19.67%|1538 / 8133 = 18.91%|1460 / 7661 = 19.06%|
| Inception v3 based AdAdversarial Data |1561 / 7674 = 20.34%|2000 / 7661 = 26.11%|1793 / 8588 = 20.88%|