# PGD-10 Attack

## Installations:

In [1]:
!pip install torch torchvision
!pip install cleverhans
!pip install xmltodict

Collecting cleverhans
  Downloading cleverhans-4.0.0-py3-none-any.whl (92 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.3/92.3 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nose (from cleverhans)
  Downloading nose-1.3.7-py3-none-any.whl (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.7/154.7 kB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pycodestyle (from cleverhans)
  Downloading pycodestyle-2.11.1-py2.py3-none-any.whl (31 kB)
Collecting mnist (from cleverhans)
  Downloading mnist-0.2.2-py2.py3-none-any.whl (3.5 kB)
Installing collected packages: nose, pycodestyle, mnist, cleverhans
Successfully installed cleverhans-4.0.0 mnist-0.2.2 nose-1.3.7 pycodestyle-2.11.1
Collecting xmltodict
  Downloading xmltodict-0.13.0-py2.py3-none-any.whl (10.0 kB)
Installing collected packages: xmltodict
Successfully installed xmltodict-0.13.0


## Imports:

In [2]:
import os
import json
import torch
import xmltodict
import torch.utils.data as data
import torchvision.transforms as transforms
from PIL import Image

## Loading the dataset:

In [3]:
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10


transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

subset_indices = list(range(5000))

train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform)
train_subset = torch.utils.data.Subset(train_dataset, subset_indices)

train_loader = DataLoader(train_subset, batch_size=64, shuffle=True)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:12<00:00, 13167055.93it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data


In [4]:
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models

class VGG16(nn.Module):
    def __init__(self, num_classes=10):
        super(VGG16, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

model = VGG16()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# Train
num_epochs = 10
for epoch in range(num_epochs):
    for data in train_loader:
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

print("Training complete!")


KeyboardInterrupt: ignored

In [None]:

correct = 0
total = 0

with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f'Test Accuracy: {accuracy * 100:.2f}%')

import matplotlib.pyplot as plt
import numpy as np

def imshow(img):
    img = img / 2 + 0.5  # Unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

dataiter = iter(test_loader)
images, labels = dataiter.next()

imshow(torchvision.utils.make_grid(images))
print('GroundTruth:', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

outputs = model(images)
_, predicted = torch.max(outputs, 1)

print('Predicted:', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))


In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

correct = 0
total = 0

with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f'Test Accuracy: {accuracy * 100:.2f}%')

import matplotlib.pyplot as plt
import numpy as np

def imshow(img):
    img = img / 2 + 0.5  # Unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

dataiter = iter(test_loader)
images, labels = dataiter.next()

imshow(torchvision.utils.make_grid(images))
print('GroundTruth:', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

outputs = model(images)
_, predicted = torch.max(outputs, 1)

print('Predicted:', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))


## Attack!

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from advertorch.attacks import PGDAttack

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

pgd_attack = PGDAttack(model, eps=0.03, nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0)

correct_adv = 0
total_adv = 0

with torch.no_grad():
    for data in test_loader:
        images, labels = data
        adv_images = pgd_attack.perturb(images, labels)

        outputs = model(adv_images)
        _, predicted = torch.max(outputs.data, 1)

        total_adv += labels.size(0)
        correct_adv += (predicted == labels).sum().item()

accuracy_adv = correct_adv / total_adv
print(f'Adversarial Test Accuracy: {accuracy_adv * 100:.2f}%')


## Visualize:

In [None]:

def visualize_adversarial_examples(model, test_loader, pgd_attack, num_examples=5):
    model.eval()
    dataiter = iter(test_loader)

    for _ in range(num_examples):
        images, labels = dataiter.next()
        adv_images = pgd_attack.perturb(images, labels)

        outputs_original = model(images)
        _, predicted_original = torch.max(outputs_original.data, 1)

        outputs_adversarial = model(adv_images)
        _, predicted_adversarial = torch.max(outputs_adversarial.data, 1)

        imshow(torchvision.utils.make_grid(images))
        print('Original - GroundTruth:', classes[labels.item()], ' Predicted:', classes[predicted_original.item()])

        imshow(torchvision.utils.make_grid(adv_images))
        print('Adversarial - Predicted:', classes[predicted_adversarial.item()])

visualize_adversarial_examples(model, test_loader, pgd_attack, num_examples=5)


## Evaluation:

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def register_hooks(model, layer_name):
    features = []

    def hook_fn(module, input, output):
        features.append(output)

    layer = dict(model.named_children())[layer_name]
    hook = layer.register_forward_hook(hook_fn)

    return features, hook

def plot_feature_maps(feature_maps, num_images=5):
    for i in range(num_images):
        feature_map = feature_maps[i][0].cpu().detach().numpy()
        plt.figure(figsize=(15, 3))
        plt.imshow(feature_map, cmap='viridis', aspect='auto')
        plt.title(f'Feature Map for Image {i + 1}')
        plt.colorbar()
        plt.show()

selected_layer_name = 'features.7'

features, hook = register_hooks(model, selected_layer_name)
with torch.no_grad():
    for data in test_loader:
        images, _ = data
        _ = model(images)

plot_feature_maps(features, num_images=5)

hook.remove()


# Running on a pre-trained model: