# Transferability of Adversarial Examples
This is a small experiment relating to the trasferability of adversarial examples, here we go.

## Import Essential Packages

In [None]:
import tensorflow as tf
import tensorflow.keras as keras
import keras.layers as layers
import numpy as np

import os
from google.colab import drive

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch
import torchvision
import torchvision.transforms as transforms

As the imagenet pretrained model of tensorflow can't be trained in a shor time, I implement it with **pytorch** whose cifar-10 pretrained model can be downloaded from github.

In [None]:
drive.mount("/content/drive")
path = "/content/drive/My Drive/adv"

os.chdir(path)
os.listdir(path)

from cifar10_models import *

Mounted at /content/drive


## Define Constants
Constants like batch size.

In [None]:
batch_size = 64
epochs = 100

## Import Dataset
Experiment on on CIFAR-10 implemented with tensorflow.

In [None]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [None]:
y_train = tf.one_hot(indices=y_train, depth=10)
y_test = tf.one_hot(indices=y_test, depth=10)

In [None]:
print(y_train.shape)

As tensorflow didn't work quite well, another trial has been done in **pytorch**.

In [None]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


## Define Models
Keras has included some model structures, so I directly use them as the first try. And I will try VGG-16, ResNet-50 and Inception v3.

In [None]:
# VGG16
vgg16_base = tf.keras.applications.VGG16(
    include_top=False, weights='imagenet', input_tensor=None, input_shape=[32,32,3],
    pooling=max, classifier_activation='softmax'
)

x = vgg16_base.output
x = layers.Flatten()(x)
x = layers.Dense(4096,activation='relu')(x)
x = layers.Dense(4096,activation='relu')(x)
x = layers.Dense(10, activation='softmax')(x)

vgg16 = keras.Model(inputs=vgg16_base.input, outputs=x)

# ResNet50
resnet50_base = tf.keras.applications.ResNet50(
    include_top=False, weights='imagenet', input_tensor=None, input_shape=[32,32,3],
    pooling=max
)

x = resnet50_base.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(10, activation='softmax')(x)

resnet50 = keras.Model(inputs=resnet50_base.input, outputs=x)

# Inception v3
inceptionv3_base = tf.keras.applications.InceptionV3(
    include_top=False, weights='imagenet', input_tensor=None, input_shape=None,
    pooling=max, classifier_activation='softmax'
)

x = inceptionv3_base.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(10, activation='softmax')(x)

inceptionv3 = keras.Model(inputs=inceptionv3_base.input, outputs=x)


In [None]:
vgg16.summary()

In [None]:
resnet50.summary()

In [None]:
inceptionv3_base.summary()

Implement with **pytorch** using pretrained models.

In [None]:
!python cifar10_download.py

100% 2.46G/2.46G [01:24<00:00, 29.0MMiB/s]
Download successful. Unzipping file.
Unzip file successful!


In [None]:
del(vgg16)
del(resnet50)
del(inception_v3)
from cifar10_models import *

In [None]:
vgg16 = vgg16_bn(pretrained=True)

resnet50 = resnet50(pretrained=True)

inception_v3 = inception_v3(pretrained=True)

## Train Models
Tran the models with standard data so as to make the models have abilities to classify the images, what's more, lead the models to learn the features of the dataset.

In [None]:
optimazer_vgg = tf.keras.optimizers.Adam(
    learning_rate=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=True,
    name='Adam'
)

optimazer_resnet = tf.keras.optimizers.Adam(
    learning_rate=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=True,
    name='Adam'
)

optimazer_inception = tf.keras.optimizers.Adam(
    learning_rate=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=True,
    name='Adam'
)

In [None]:
vgg16.compile(
    optimizer=optimazer_vgg, 
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
    metrics=['accuracy']
)

resnet50.compile(
    optimizer=optimazer_resnet, 
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
    metrics=['accuracy']
)

inceptionv3.compile(
    optimizer=optimazer_inception, 
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
    metrics=['accuracy']
)

In [None]:
vgg16.fit(x=x_train, y=y_train, validation_split =0.1, verbose=1, batch_size=batch_size, epochs=epochs)

# resnet50.fit(x=x_train, y=y_train, validation_split=0.1, verbose=1, batch_size=batch_size, epochs=epochs)

# inceptionv3.fit(x=x_train, y=y_train, validation_split=0.1, verbose=1, batch_size=batch_size, epochs=epochs)

## Test Model
Test the accuracy of the models.

In [None]:
device = torch.device("cuda")
vgg16.to(device)
resnet50.to(device)
inception_v3.to(device)
vgg16.eval()
resnet50.eval()
inception_v3.eval()

In [None]:
correct_vgg = correct_resnet = correct_inception = 0
total = 0
with torch.no_grad():
  for data in testloader:
      images, labels = data
      images = images.to(device)
      labels = labels.to(device)
      outputs_vgg = vgg16(images)
      outputs_resnet = resnet50(images)
      outputs_inception = inception_v3(images)
      _, predicted_vgg = torch.max(outputs_vgg.data, 1)
      _, predicted_resnet = torch.max(outputs_resnet.data, 1)
      _, predicted_inception = torch.max(outputs_inception.data, 1)
      total += labels.size(0)
      correct_vgg += (predicted_vgg == labels).sum().item()
      correct_resnet += (predicted_resnet == labels).sum().item()
      correct_inception += (predicted_inception == labels).sum().item()


print('Accuracy of the vgg16 on the 10000 test images: %d%%' % (100 * correct_vgg / total))
print('Accuracy of the resnet50 on the 10000 test images: %d%%' % (100 * correct_resnet / total))
print('Accuracy of the inception_v3 on the 10000 test images: %d%%' % (100 * correct_inception / total))

Accuracy of the vgg16 on the 10000 test images: 92%
Accuracy of the resnet50 on the 10000 test images: 90%
Accuracy of the inception_v3 on the 10000 test images: 90%


## Adversarial Examples
Here I am going to generate some adversarial examples of one specific model structure, then apply them to other models to check the transibility of adversarial examples.

In [None]:
# FGSM attack code
def fgsm_attack(image, epsilon, data_grad):
  # Collect the element-wise sign of the data gradient
  sign_data_grad = data_grad.sign()
  # Create the perturbed image by adjusting each pixel of the input image
  perturbed_image = image + epsilon*sign_data_grad
  # Adding clipping to maintain [0,1] range
  # perturbed_image = torch.clamp(perturbed_image, 0, 1)
  # Return the perturbed image
  return perturbed_image


def test(target_model, other_model_1, other_model_2, device, test_loader, epsilon):

  # Accuracy counter
  target_correct = other_correct_1 =other_correct_2 = 0
  valid_examples = valid_examples_1 = valid_examples_2 = 0
  adv_examples = []

  # Loop over all examples in test set
  for data, target in test_loader:
    # Send the data and label to the device
    data, target = data.to(device), target.to(device)

    # Set requires_grad attribute of tensor. Important for Attack
    data.requires_grad = True

    # Forward pass the data through the model
    output = target_model(data)
    other_output_1 = other_model_1(data)
    other_output_2 = other_model_2(data)
    init_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
    other_init_pred_1 = other_output_1.max(1, keepdim=True)[1]
    other_init_pred_2 = other_output_2.max(1, keepdim=True)[1]

    # Model 1
    # If the initial prediction is wrong, dont bother attacking, just move on
    if init_pred.item() != target.item():
      continue
    else:
      valid_examples += 1
      # Calculate the loss
      loss = F.nll_loss(output, target)
      # Zero all existing gradients
      target_model.zero_grad()
      # Calculate gradients of model in backward pass
      loss.backward()
      # Collect datagrad
      data_grad = data.grad.data
      # Call FGSM Attack
      perturbed_data = fgsm_attack(data, epsilon, data_grad)
      # Re-classify the perturbed image
      output = target_model(perturbed_data)

      # Check for success
      final_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
      if final_pred.item() == target.item():
        target_correct += 1
        # Special case for saving 0 epsilon examples
        if (epsilon == 0) and (len(adv_examples) < 5):
          adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
          adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )
        else:
          # Save some adv examples for visualization later
          if len(adv_examples) < 5:
            adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
            adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )

    # Model 2
    if other_init_pred_1.item() != target.item():
      pass
    else:
      valid_examples_1 += 1
      # Re-classify the perturbed image
      other_output_1 = other_model_1(perturbed_data)

      # Check for success
      final_pred = other_output_1.max(1, keepdim=True)[1] # get the index of the max log-probability
      if final_pred.item() == target.item():
        other_correct_1 += 1



    # Model 3
    if other_init_pred_2.item() != target.item():
      pass
    else:
      valid_examples_2 += 1
      # Re-classify the perturbed image
      other_output_2 = other_model_2(perturbed_data)

      # Check for success
      final_pred = other_output_2.max(1, keepdim=True)[1] # get the index of the max log-probability
      if final_pred.item() == target.item():
        other_correct_2 += 1
   

  # Calculate final accuracy for this epsilon
  final_acc = []
  final_acc_1 = target_correct/float(valid_examples)
  final_acc_2 = other_correct_1/float(valid_examples_1)
  final_acc_3 = other_correct_2/float(valid_examples_2)
    
  final_acc.append(final_acc_1)
  final_acc.append(final_acc_2)
  final_acc.append(final_acc_3)

  print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, target_correct, valid_examples, final_acc_1))
  print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, other_correct_1, valid_examples_1, final_acc_2))
  print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, other_correct_2, valid_examples_2, final_acc_3))

  # Return the accuracy and an adversarial example
  return final_acc, adv_examples

In [None]:
adv_test_loader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=True)

In [None]:
examples = []
accuracies = []
epsilons = [0.25]
# Test VGG16
for eps in epsilons:
    acc, ex = test(vgg16, resnet50, inception_v3, device, adv_test_loader, eps)
    accuracies.append(acc)
    examples.append(ex)