Installation and Repository cloning

In [26]:
!pip install adversarial-robustness-toolbox --quiet
!pip install multiprocess --quiet
!pip install importlib --quiet
!git clone https://github.com/Georgsiedel/corruption-testing.git --quiet

Import Libraries

In [33]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
import numba
numba.__version__

import importlib
import time
import matplotlib.pyplot as plt
import numpy as np

from art.estimators.classification import PyTorchClassifier
from PIL import Image, ImageDraw, ImageFont, ImageOps

Import Adversarial Attack Methods

In [34]:
from art.attacks.evasion import (FastGradientMethod,
                                 ProjectedGradientDescentPyTorch,
                                 AutoAttack,
                                 AutoProjectedGradientDescent,
                                 AutoConjugateGradient,
                                 CarliniLInfMethod,
                                 CarliniL2Method,
                                 NewtonFool,
                                 DeepFool,
                                 ElasticNet,
                                 FrameSaliencyAttack,
                                 HopSkipJump,
                                 BasicIterativeMethod)

Load and Prepare CIFAR-10 Dataset

In [35]:
def load_dataset(dataset_split: int):
  # Load CIFAR-10 dataset using torchvision
  transform = transforms.Compose([transforms.ToTensor()])
  testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

  # Truncated testset for experiments and ablations
  truncated_testset, _ = torch.utils.data.random_split(testset,
                                                      [dataset_split, len(testset) - dataset_split],
                                                      generator=torch.Generator().manual_seed(42))

  # Extract data and labels from torchvision dataset
  xtest = torch.stack([data[0] for data in truncated_testset])
  ytest = torch.tensor([data[1] for data in truncated_testset])

  return xtest, ytest

Load and Prepare WideResNet Model

In [36]:
%cd /kaggle/working/corruption-testing

import experiments.models.wideresnet as wideresnet

# Initialize WideResNet model with specified parameters
net = wideresnet.WideResNet_28_4(10, 1, 'CIFAR10', normalized=True, block=wideresnet.WideBasic, dropout_rate=0.0, activation_function='relu')
# net = wideresnet.WideResNet_28_4(10, 1, 'CIFAR10', normalized=True, block=wideresnet.WideBasic, dropout_rate=0.0, activation_function='silu')

# Enable DataParallel to utilize multiple GPUs
net = torch.nn.DataParallel(net)

# Specify the path to the pre-trained model weights
PATH = '/kaggle/input/standard/pytorch/standardv1.0/1/standard.pth'
# PATH = '/kaggle/input/wrn28-4/pytorch/corruption-robust/1/robust.pth'

# Load the state dictionary from the specified path
state_dict = torch.load(PATH)

# Load the model weights into the WideResNet model
net.load_state_dict(state_dict["model_state_dict"], strict=False)

# Evaluate
net.eval()

/kaggle/working/corruption-testing


DataParallel(
  (module): WideResNet(
    (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (layer1): Sequential(
      (0): WideBasic(
        (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv1): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (dropout): Dropout(p=0.0, inplace=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (shortcut): Sequential(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
        )
      )
      (1): WideBasic(
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (dropout): Dropout(p=0.0, inplace=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True

# Initializations

Adversarial Attack Class

In [37]:
"""
This class provides a unified interface to generate various adversarial attacks using the Adversarial Robustness Toolbox (ART).
The attacks can be generated by specifying the type of attack and its parameters
"""

class AdversarialAttacks:
  def __init__(self, classifier, epsilon, eps_iter, norm, max_iterations):
    self.classifier = classifier
    self.epsilon = epsilon
    self.eps_iter = eps_iter
    self.norm = norm
    self.max_iterations = max_iterations

  def generate_attack(self, attack_type, **kwargs):

    if attack_type=='fast_gradient_method':
        return FastGradientMethod(self.classifier,
                                eps=self.epsilon,
                                eps_step=self.eps_iter,
                                minimal=True,
                                norm=self.norm,
                                **kwargs)
    elif attack_type=='projected_gradient_descent':
        return ProjectedGradientDescentPyTorch(self.classifier,
                                             eps=self.epsilon,
                                             eps_step=self.eps_iter,
                                             max_iter=self.max_iterations,
                                             norm=self.norm,
                                             **kwargs)
    elif attack_type=='auto_attack':
        return AutoAttack(estimator=self.classifier,
                        eps=self.epsilon,
                        eps_step=self.eps_iter,
                        norm=self.norm)
    elif attack_type=='auto_projected_gradient_descent':
        return AutoProjectedGradientDescent(estimator=self.classifier,
                                          eps=self.epsilon,
                                          eps_step=self.eps_iter,
                                          norm=self.norm,
                                          max_iter=self.max_iterations,
                                          **kwargs)
    elif attack_type=='auto_conjugate_gradient':
        return AutoConjugateGradient(estimator=self.classifier,
                                   eps=self.epsilon,
                                   eps_step=self.eps_iter,
                                   norm=self.norm,
                                   max_iter=self.max_iterations,
                                   **kwargs)
    elif attack_type=='carlini_wagner_linf':
        return CarliniLInfMethod(self.classifier,
                               max_iter=40,
                               **kwargs)
    elif attack_type=='carlini_wagner_l2':
        return CarliniL2Method(self.classifier,
                               max_iter=40,
                               **kwargs)
    elif attack_type=='newton_fool':
        return NewtonFool(self.classifier,
                        max_iter=self.max_iterations,
                        **kwargs)
    elif attack_type=='deep_fool':
        return DeepFool(self.classifier,
                      max_iter=self.max_iterations,
                      epsilon=self.eps_iter,
                      **kwargs)
    elif attack_type=='elastic_net':
        return ElasticNet(self.classifier,
                      max_iter=40)
    elif attack_type=='frame_saliency':
        attacker = BasicIterativeMethod(self.classifier,
                                                 eps=self.epsilon,
                                                 eps_step=self.eps_iter,
                                                 max_iter=self.max_iterations,
                                      )
        return FrameSaliencyAttack(self.classifier,
                                 attacker,
                                 method='iterative_saliency')
    elif attack_type=='hop_skip_jump':
        return HopSkipJump(self.classifier,
                         norm=self.norm,
                         max_iter=40)
    else:
        raise ValueError(f'Attack type "{attack_type}" not supported!')

Adversarial Attack with Early Stopping

In [38]:
def attack_with_early_stopping(classifier, x, y, epsilon, eps_step, maximum_iterations, norm, attack_type, attack_wrapper, verbose: bool = True):
  label_flipped = False
  count, elapsed_time, total_time = 0, 0., 0.

  x = x.unsqueeze(0).numpy()
  y = y.numpy()

  outputs = classifier.predict(x)
  _, clean_predicted = torch.max(torch.tensor(outputs).data, 1)

  for i in range(maximum_iterations):

    attack = attack_wrapper.generate_attack(attack_type)

    start_time = time.time()
    adv_inputs = attack.generate(x,
                                y=np.expand_dims(y, axis=0))
    end_time = time.time()

    elapsed_time = end_time - start_time
    total_time += elapsed_time

    outputs = classifier.predict(adv_inputs)
    _, predicted = torch.max(torch.tensor(outputs).data, 1)

    label_flipped = bool(predicted.item()!=int(y))

    if label_flipped:
      count += 1
      if count>1:
        raise ValueError(f'Attack Continues even after the label is flipped for the {count}th time from {clean_predicted.item()} to {predicted.item()} on iteration [{i+1}/{max_iterations}]!!\n')
      else:
        if clean_predicted.item()!=int(y):
          if verbose:
            print(f'\nIterations for successful attack on misclassified input: {i+1} with processing time: {elapsed_time}\n')
          break
        if verbose:
          print(f'\nIterations for successful attack: {i+1} with processing time: {elapsed_time:.3f} seconds\n')
        break
        if verbose:
          print(f'\nLabel Flipped successfully on iteration: [{i+1}/{max_iterations}]\n')

    x = adv_inputs.copy()
  return adv_inputs, total_time

Adversarial Distance calculation for adversarial attack methods

In [39]:
def adversarial_distance_calculation(classifier, x, y, epsilon, eps_iter, norm, max_iterations, attack_type, get_image: bool = False, verbose: bool = True):

  distance_list, runtime_list = [], []

  attacks = AdversarialAttacks(classifier=classifier,
                              epsilon=epsilon,
                              eps_iter=eps_iter,
                              norm=norm,
                              max_iterations=1)

  correct_prediction, total_labels = 0, 0

  for i, x in enumerate(xtest):
    x_adversarial, runtime = attack_with_early_stopping(classifier=classifier,
                                                        x=x,
                                                        y=y[i],
                                                        epsilon=epsilon,
                                                        eps_step=eps_iter,
                                                        maximum_iterations=max_iterations,
                                                        norm=norm,
                                                        attack_type=attack_type,
                                                        attack_wrapper=attacks)

    x_tensor = torch.tensor(x)
    x_adversarial_tensor = torch.tensor(x_adversarial)

    # Adversarial accuracy calculation
    output_adversarial = classifier.predict(x_adversarial)
    _, predicted_adversarial = torch.max(torch.tensor(output_adversarial).data, 1)

    correct_prediction += (predicted_adversarial.item() == int(y[i]))
    total_labels = len(xtest)
    adversarial_accuracy = (correct_prediction / total_labels) * 100

    distance = torch.norm((x_tensor - x_adversarial_tensor), p=float(norm))
    distance_list.append(distance.item())
    runtime_list.append(runtime)
    
    if verbose:
      print(f'Image {i}\t\tAdversarial_distance: {distance:.5f}\t\tRuntime: {runtime:5f} seconds')

  if get_image:
    get_example_image(x_adversarial, predicted_adversarial.item(), attack_type=attack_type)
    get_example_image(x.unsqueeze(0).numpy(), y[i], attack_type='original')

  print(f'\nAdversarial accuracy: {adversarial_accuracy}%\n')

  return distance_list, runtime_list, adversarial_accuracy

Generate and Save Example Images

In [40]:
def get_example_image(x, y, attack_type: str):
  label_map = {0: 'airplane',
               1: 'automobile',
               2: 'bird',
               3: 'cat',
               4: 'deer',
               5: 'dog',
               6: 'frog',
               7: 'horse',
               8: 'ship',
               9: 'truck'}

  img = x.squeeze(0).transpose(1, 2, 0)
  img = Image.fromarray((img * 255).astype('uint8'), 'RGB')
  img = img.resize((224, 224))

  img = ImageOps.expand(img, border=10, fill=(255, 255, 255))
  draw = ImageDraw.Draw(img)
  title = f'Label: {label_map[int(y)]}'
  draw.text((0, 0), title, (0, 0, 0))

  img.save(f'/kaggle/working/example_{attack_type}.png')

Model configuration and Hyperparameter specifications

In [41]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.01)

# Initialize PyTorchClassifier for ART
classifier = PyTorchClassifier(model=net,
                               loss=criterion,
                               optimizer=optimizer,
                               input_shape=(3, 32, 32),
                               nb_classes=10)

# Adversarial attack hyperparameters
norm = np.inf
max_iterations = 250
eps_iter = 0.0003
epsilon = max_iterations * eps_iter

# Split for the dataset (for truncation purposes in experiments)
split = 2

Load CIFAR-10 Test Dataset

In [42]:
xtest, ytest = load_dataset(dataset_split=split)

Files already downloaded and verified


# Evaluation

In [46]:
attack_types = [
                'fast_gradient_method',
                'projected_gradient_descent',
#                 'auto_projected_gradient_descent',
#                 'auto_conjugate_gradient',
                'newton_fool',
                'deep_fool',
#                 'elastic_net',
#                 'frame_saliency',
#                 'auto_attack',
#                 'carlini_wagner_linf',
#                 'carlini_wagner_l2',
#                 'hop_skip_jump'
                ]

results_dict = {}

for attack_type in attack_types:
  results_dict[attack_type] = {}
  print(f'\t\t-------------------------- Processing Attack: {attack_type} --------------------------\n')
  results_dict[attack_type]["adversarial_distance"], results_dict[attack_type]["runtime"], results_dict[attack_type]["adversarial_accuracy"] = linf_distance_calculation(classifier=classifier,
                                                        x=xtest,
                                                        y=ytest,
                                                        epsilon=epsilon,
                                                        eps_iter=eps_iter,
                                                        norm=norm,
                                                        max_iterations=max_iterations,
                                                        attack_type=attack_type)
  print(f'\n\t\t{attack_type}\t\t{sum(results_dict[attack_type]["runtime"]):.3f}\t\t\n')

		-------------------------- Processing Attack: fast_gradient_method --------------------------


Iterations for successful attack: 1 with processing time: 0.088 seconds

Image 0		Adversarial_distance: 0.00300		Runtime: 0.087875 seconds


  x_tensor = torch.tensor(x)



Iterations for successful attack: 1 with processing time: 0.742 seconds

Image 1		Adversarial_distance: 0.03660		Runtime: 0.741750 seconds

Adversarial accuracy: 0.0%


		fast_gradient_method		0.830		

		-------------------------- Processing Attack: projected_gradient_descent --------------------------



PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Iterations for successful attack: 9 with processing time: 0.039 seconds

Image 0		Adversarial_distance: 0.00270		Runtime: 0.368810 seconds


PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Iterations for successful attack: 58 with processing time: 0.050 seconds

Image 1		Adversarial_distance: 0.01740		Runtime: 2.526577 seconds

Adversarial accuracy: 0.0%


		projected_gradient_descent		2.895		

		-------------------------- Processing Attack: newton_fool --------------------------



NewtonFool:   0%|          | 0/1 [00:00<?, ?it/s]


Iterations for successful attack: 1 with processing time: 0.045 seconds

Image 0		Adversarial_distance: 0.04790		Runtime: 0.045466 seconds


NewtonFool:   0%|          | 0/1 [00:00<?, ?it/s]

NewtonFool:   0%|          | 0/1 [00:00<?, ?it/s]

NewtonFool:   0%|          | 0/1 [00:00<?, ?it/s]

NewtonFool:   0%|          | 0/1 [00:00<?, ?it/s]


Iterations for successful attack: 4 with processing time: 0.037 seconds

Image 1		Adversarial_distance: 0.08054		Runtime: 0.171069 seconds

Adversarial accuracy: 0.0%


		newton_fool		0.217		

		-------------------------- Processing Attack: deep_fool --------------------------



DeepFool:   0%|          | 0/1 [00:00<?, ?it/s]


Iterations for successful attack: 1 with processing time: 0.155 seconds

Image 0		Adversarial_distance: 0.02189		Runtime: 0.154649 seconds


DeepFool:   0%|          | 0/1 [00:00<?, ?it/s]

DeepFool:   0%|          | 0/1 [00:00<?, ?it/s]


Iterations for successful attack: 2 with processing time: 0.146 seconds

Image 1		Adversarial_distance: 0.09706		Runtime: 0.295937 seconds

Adversarial accuracy: 0.0%


		deep_fool		0.451		



In [47]:
for attack_type in attack_types:
  print(f'\nMean adversarial distance for {attack_type}: {np.mean(results_dict[attack_type]["adversarial_distance"]):.5f} with total runtime: {sum(results_dict[attack_type]["runtime"]): .5f} seconds\n')


Mean adversarial distance for fast_gradient_method: 0.01980 with total runtime:  0.82962 seconds


Mean adversarial distance for projected_gradient_descent: 0.01005 with total runtime:  2.89539 seconds


Mean adversarial distance for newton_fool: 0.06422 with total runtime:  0.21653 seconds


Mean adversarial distance for deep_fool: 0.05947 with total runtime:  0.45059 seconds



Save Results to JSON File

In [48]:
import json

json_file_path = f'/kaggle/working/results_l{norm}.json'
with open(json_file_path, 'w') as f:
  json.dump(results_dict, f, indent=4)
print(f'Evaluation results are saved under "{json_file_path}".')

Evaluation results are saved under "/kaggle/working/results_linf.json".


In [49]:
# plt.figure(figsize=(15, 8))
# for attack_type in attack_types:
#   plt.scatter(list(range(len(xtest))), results_dict[attack_type]['adversarial_distance'], label=attack_type)
# plt.xlabel('Image ID ----->')
# plt.ylabel('Distance ----->')
# plt.title(f'L_{norm} Distance')
# plt.legend()
# plt.tight_layout()
# plt.show()

In [50]:
# plt.figure(figsize=(15, 8))
# for attack_type in attack_types:
#   plt.plot(list(range(len(xtest))), results_dict[attack_type]['runtime'], label=attack_type)
# plt.xlabel('Image ID ----->')
# plt.ylabel('Runtime [seconds] ----->')
# plt.title('Step Runtime')
# plt.legend()
# plt.tight_layout()
# plt.show()

In [51]:
# plt.figure(figsize=(15, 8))
# for attack_type in attack_types:
#   plt.bar(attack_type, results_dict[attack_type]['adversarial_accuracy'], label=attack_type)
# plt.xlabel('Attacks')
# plt.ylabel('Adversarial accuracy [%] ----->')
# plt.xticks(rotation=45)
# plt.legend()
# plt.title('Adversarial Accuracy')
# plt.tight_layout()
# plt.show()

In [52]:
# plt.figure(figsize=(15, 8))
# for attack_type in attack_types:
#   plt.bar(attack_type, sum(results_dict[attack_type]['runtime']), label=attack_type)
# plt.xlabel('Attacks')
# plt.xticks(rotation=45)
# plt.ylabel('Runtime [seconds]----->')
# plt.title('Total Runtime')
# plt.legend()
# plt.tight_layout()
# plt.show()