### Set UP
* Robustbench: this library is used for loading robust classifer, For more information visit: https://github.com/RobustBench/robustbench

* foolbox: this library is used for adversarial example generation. For more information visit: https://github.com/bethgelab/foolbox

In [None]:
from robustbench.utils import clean_accuracy
from robustbench.utils import load_model
import matplotlib.pyplot as plt
from torch import unique
import foolbox as fb
import numpy as np
import pickle
import torch
import os

### Download and preprocess the data:

* We will use 1000 test examples from the cifar 10 dataset. These images are new to the model as it hasn't seen them in the training phase. We want to fool the model on its predictions for new images!

In [None]:
import gdown
output_file = 'cifar10.pt'
file_id = "1A5gQCE0bHZhBlfcLQ2fFP5UygpgVkdAX"
gdown.download(f"https://drive.google.com/uc?id={file_id}", output_file)

Downloading...
From: https://drive.google.com/uc?id=1A5gQCE0bHZhBlfcLQ2fFP5UygpgVkdAX
To: /content/cifar10.pt
100%|██████████| 12.3M/12.3M [00:00<00:00, 36.0MB/s]


'cifar10.pt'

In [None]:
cifar_data = torch.load('cifar10.pt')

  cifar_data = torch.load('cifar10.pt')


In [None]:
# Extract the images and labels tensors
x_test = cifar_data['images'] / 255.0
y_test = cifar_data['labels']

print(unique(y_test, return_counts=True))

(tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), tensor([100, 100, 100, 100, 100, 100, 100, 100, 100, 100]))


In [None]:
print(x_test.shape, y_test.shape)
print(torch.max(x_test), torch.min(x_test))

torch.Size([1000, 3, 32, 32]) torch.Size([1000])
tensor(1.) tensor(0.)


### Loading the robust model

* IMPORTANT: You shouldn't change this part of the code as your final generated examples will be evaluated how successful you are at fooling this model!

In [None]:
model = load_model(model_name='Kireev2021Effectiveness_RLATAugMix', dataset='cifar10', threat_model='corruptions')

  checkpoint = torch.load(model_path, map_location=torch.device('cpu'))


### GPU Utilization

* For shorter running time, let's utilize GPU!

In [None]:
# Check if GPU is available and set the device accordingly
if torch.cuda.is_available():
    device = torch.device('cuda')
    print("Using GPU:", torch.cuda.get_device_name(0))
else:
    device = torch.device('cpu')
    print("Using CPU")

model = model.to(device)
x_test = x_test.to(device)
y_test = y_test.to(device)

Using GPU: Tesla T4


### Adversarial Example Generation -- Adversarial Perturbation

* Here for a baseline, we use the PGD algorithm from foolbox library. This is the most important part of the challenge. What algorithm is gonna work best?

* There are many algorithms and many other adversarial example generation algorithms. Don't forget to check out other libraries!

  * One other very popular library among many others is Adversarial Robustness Toolbox (ART)!
  * There are many more algorithms out there, your task is to find the ones that works best based on our evaluation metrics.

In [None]:
model_fb = fb.PyTorchModel(model, bounds=(0, 1))

In [None]:
import itertools
import foolbox as fb

#carlini wagner
confidence_grid = [0]
cw_steps_grid = [1000]
binary_search_steps = [9]
stepsize = [0.1]
initial_const = [0.1]
epsilon_grid = [None]

# Combine all hyperparameters into a grid for both attacks
cw_grid = list(itertools.product(confidence_grid, cw_steps_grid, binary_search_steps, stepsize, initial_const, epsilon_grid))

# Function to run the Carlini & Wagner attack
def run_cw(model_fb, x_test, y_test, binary_search_steps, confidence, steps, step_size, initial_const, epsilon):
    # Initialize the C&W attack
    attack = fb.attacks.L2CarliniWagnerAttack(binary_search_steps=binary_search_steps, steps=steps, stepsize=step_size , confidence=confidence, initial_const=initial_const)
    # Call the attack without a fixed epsilon (uses L2 norm minimization)
    return attack(model_fb, x_test, y_test,epsilons=epsilon)

# Iterate through both attack grids
results = []

# Carlini & Wagner grid search
for params in cw_grid:
    confidence, cw_step, binary_search_step, step_size, initial_const, epsilon = params
    print(f"Running Carlini & Wagner with params: confidence={confidence}, steps={cw_step}, binary_search_steps={binary_search_step}, stepsize={step_size}, initial_const={initial_const}, epsilon={epsilon}")

    # Run the C&W attack with current parameters
    _, advs, success = run_cw(model_fb, x_test, y_test, binary_search_step, confidence,  cw_step, step_size, initial_const, epsilon)

    # Evaluate based on your custom scoring system
    score = 1 - success.float().mean()

    # Store results
    results.append({
        'attack': 'CW',
        'confidence': confidence,
        'steps': cw_step,
        'binary_search_steps': binary_search_step,
        'stepsize': step_size,
        'initial_const': initial_const,
        'score': score
    })
    print(f"Carlini & Wagner Score: {score}")

# After completing both grid searches, find the best set of parameters based on the score
best_result = max(results, key=lambda x: x['score'])
print(f"Best result: {best_result}")


Running Carlini & Wagner with params: confidence=0, steps=1000, binary_search_steps=9, stepsize=0.1, initial_const=0.1, epsilon=None


### Let's compare the accuracies before and after perturbation!

In [None]:
print('Robust accuracy: {:.1%}'.format(1 - success.float().mean()))
print(clean_accuracy(model, x_test, y_test))

Robust accuracy: 1.7%
0.941


### Let's explore how our perturbations look!

In [None]:
import torch
import matplotlib.pyplot as plt
import random

# Pass the perturbed images through the model to get the predicted labels
with torch.no_grad():  # No need to track gradients during inference
    logits_adv = model(advs[0].to('cuda'))  # Get the logits for the adversarial examples

# Get the predicted labels from the logits
predicted_labels_adv = torch.argmax(logits_adv, dim=1)

# Find which examples were misclassified (where predicted label != true label)
misclassified_indices = (predicted_labels_adv != y_test.to('cuda')).nonzero(as_tuple=True)[0]

# Get the misclassified original and perturbed images, true labels, and incorrect labels
misclassified_images = advs[0][misclassified_indices]
misclassified_original_images = x_test.to('cuda')[misclassified_indices]
misclassified_predicted_labels = predicted_labels_adv[misclassified_indices]
misclassified_true_labels = y_test.to('cuda')[misclassified_indices]

# Choose a random subset of misclassified images to display
num_images_to_show = min(10, len(misclassified_images))  # Limit to 10 images for display
random_indices = random.sample(range(len(misclassified_images)), num_images_to_show)

# Class names (assuming CIFAR-10)
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']

# Plot the original and misclassified perturbed images side by side
plt.figure(figsize=(25, 5))
for i, idx in enumerate(random_indices):
    # Original image
    original_image = misclassified_original_images[idx]
    true_label = misclassified_true_labels[idx].item()

    # Perturbed image
    perturbed_image = misclassified_images[idx]
    incorrect_label = misclassified_predicted_labels[idx].item()

    # Convert images from tensor to numpy and transpose from (C, H, W) to (H, W, C)
    original_img = original_image.permute(1, 2, 0).cpu().numpy()
    perturbed_img = perturbed_image.permute(1, 2, 0).cpu().numpy()

    # Plot original image
    plt.subplot(2, num_images_to_show, i+1)
    plt.imshow(original_img, interpolation='none')
    plt.title(f"Original: {class_names[true_label]}")
    plt.axis('off')

    # Plot perturbed (misclassified) image
    plt.subplot(2, num_images_to_show, num_images_to_show + i + 1)
    plt.imshow(perturbed_img, interpolation='none')
    plt.title(f"Perturbed: {class_names[incorrect_label]}")
    plt.axis('off')

plt.tight_layout()
plt.show()


### Finally!

* Let's save our perturbed samples in a folder called 'challenge' and submit them for the evaluation.

In [None]:
advs = [advs]
print(advs[0].shape)

# Create the 'challenge' directory if it doesn't exist
os.makedirs('challenge', exist_ok=True)

# Path to save the adversarial examples
file_path = os.path.join('challenge', 'advs.pkl')

# Save the 'advs' object
with open(file_path, 'wb') as f:
    pickle.dump(advs, f)

torch.Size([1000, 3, 32, 32])
