In [4]:
!pip install cleverhans

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [5]:
import torch
import torchvision
from torchvision import datasets, transforms, models
import numpy as np
import matplotlib.pyplot as plt

In [34]:
# Load dataset and initialize dataloader
BATCH_SIZE = 8

transform = transforms.Compose([transforms.Resize((224, 224)),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

testset = datasets.CIFAR10(root="~/data",
                           download=True,
                           train=False,
                           transform=transform)

testloader = torch.utils.data.DataLoader(testset,
                                         batch_size=BATCH_SIZE,
                                         shuffle=True,
                                         num_workers=2)

truncated_testset, _ = torch.utils.data.random_split(testset, [1000, 9000])

truncated_testloader = torch.utils.data.DataLoader(truncated_testset,
                                                   batch_size=BATCH_SIZE,
                                                   shuffle=True,
                                                   num_workers=2)

Files already downloaded and verified


In [35]:
# Load the trained resnet model
PATH = "/content/RESNET_CLASSIFIER_L.pth"
resnet = torch.load(PATH)

In [36]:
resnet.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [37]:
device = torch.device("cuda")

In [38]:
# Checking accuracy on clean images
correct, total = 0, 0
with torch.no_grad():
  for i, data in enumerate(testloader):
    inputs, labels = data
    inputs, labels = inputs.to(device), labels.to(device)

    outputs = resnet(inputs)

    _, predicted = torch.max(outputs.data, 1)

    total += labels.size(0)
    correct += (predicted==labels).sum().item()

  print(f"Accuracy on 1000 clean images: {correct/total*100}")

Accuracy on 1000 clean images: 84.21


In [39]:
torch.cuda.empty_cache()

In [40]:
from cleverhans.torch.attacks.fast_gradient_method import fast_gradient_method
from cleverhans.torch.attacks.projected_gradient_descent import projected_gradient_descent
from cleverhans.torch.attacks.noise import noise
from cleverhans.torch.attacks.sparse_l1_descent import sparse_l1_descent
from cleverhans.torch.attacks.carlini_wagner_l2 import carlini_wagner_l2

In [41]:
attacks = ["fast_gradient_method",
           "projected_gradient_descent",
           "noise",
           "sparse_l1_descent",
           "carlini_wagner_l2"]

In [42]:
def distance_calculation(testloader, model, attack_modes):
  epsilon = 0.1
  clip_min = 0.0
  clip_max = 1.0
  norm = np.inf
  eps_iter = 0.05
  nb_iter = 10
  grad_sparsity = 50
  n_classes = 10
  order = norm

  for attack in attack_modes:
    distances = []
    for i, data in enumerate(truncated_testloader):
      inputs, labels = data
      inputs, labels = inputs.to(device), labels.to(device)
      
      if attack=="fast_gradient_method":
        adversarial_inputs = fast_gradient_method(resnet,
                                                  inputs,
                                                  eps=epsilon,
                                                  norm=norm,
                                                  clip_min=clip_min,
                                                  clip_max=clip_max,
                                                  y=labels,
                                                  targeted=False)

      elif attack=="projected_gradient_method":
        adversarial_inputs = projected_gradient_descent(resnet,
                                                  inputs,
                                                  eps=epsilon,
                                                  eps_iter=eps_iter,
                                                  nb_iter=nb_iter,
                                                  norm=norm)

      elif attack=="noise":
        adversarial_inputs = noise(inputs,
                             eps=epsilon,
                             order=order)
        
      elif attack=="sparse_l1_descent":
        adversarial_inputs = sparse_l1_descent(resnet,
                                         inputs,
                                         eps=epsilon,
                                         eps_iter=0.05,
                                         nb_iter=20,
                                         targeted=False,
                                         rand_init=False,
                                         clip_grad=False,
                                         grad_sparsity=grad_sparsity,
                                         sanity_checks=True)
        
      elif attack=="carlini_wagner_l2":
        adversarial_inputs = carlini_wagner_l2(resnet,
                                         inputs,
                                         n_classes=n_classes,
                                         targeted=False,
                                         binary_search_steps=3,
                                         max_iterations=100,
                                         initial_const=0.01,
                                         lr=0.01,
                                         confidence=0.1)

      distance = torch.norm(inputs-adversarial_inputs, p=2)
      distances.append(distance)  

    distances = torch.FloatTensor(distances)    
    print(f"Distance on attacked 1000 adversarial examples using {attack}: {round(torch.mean(distances).item(), 2)}")

In [43]:
distance_calculation(truncated_testloader, resnet, attacks)

Distance on attacked 1000 adversarial examples using fast_gradient_method: 405.61
Distance on attacked 1000 adversarial examples using projected_gradient_descent: 693.08
Distance on attacked 1000 adversarial examples using noise: 63.36
Distance on attacked 1000 adversarial examples using sparse_l1_descent: 0.0
Distance on attacked 1000 adversarial examples using carlini_wagner_l2: 399.03
