# Breaking Defenses & Black-Box Attacks

In [1]:
import torch
from torch import nn
from torch.optim import Adam
import torch.nn.functional as F
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader

from torchvision import transforms
from torchvision.models import resnet18, mobilenet_v2
from torchvision.datasets.cifar import CIFAR10

from tqdm import trange, tqdm

torch.manual_seed(0)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

# CIFAR10 Dataset (5 points)

In [2]:
norm_mean = (0.4914, 0.4822, 0.4465)
norm_std = (0.2023, 0.1994, 0.2010)
batch_size = 128

mu = torch.tensor(norm_mean).view(3,1,1).to(device)
std = torch.tensor(norm_std).view(3,1,1).to(device)

# TODO: Set the upper limit and lower limit possible for images
upper_limit = (1 - mu) / std
lower_limit = (0 - mu) / std

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(norm_mean, norm_std),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(norm_mean, norm_std),
])

trainset = CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

testset = CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)


classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')


100%|██████████| 170M/170M [00:05<00:00, 28.8MB/s]


# Defensive Distillation (25 points)

[Defensive distillation](https://arxiv.org/abs/1511.04508) proceeds in four steps:

1.   **Train the teacher network**, by setting the temperature of the softmax to T during the
training phase.
2.   **Compute soft labels** by apply the teacher network to each instance in the training set, again evaluating the softmax at temperature T.
3.  **Train the distilled network** (a network with the same shape as the teacher network) on the soft labels, using softmax at temperature T.
4.  Finally, when running the distilled network at test time to classify new inputs, use temperature 1.



## Train the teacher

In [3]:
def train_step(model, dataloader, loss_fn, optimizer, temperature):
    model.train()
    total_loss, correct, total = 0, 0, 0

    for x, y in dataloader:
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()
        logits = model(x) / temperature
        loss = loss_fn(logits, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * x.size(0)
        preds = logits.argmax(dim=1)
        correct += (preds == y).sum().item()
        total += x.size(0)

    acc = 100 * correct / total
    return total_loss / total, acc


def train_teacher(model, n_epochs, loader=trainloader, temp=100):
    model.to(device)
    optimizer = Adam(model.parameters(), lr=1e-3)
    loss_fn = CrossEntropyLoss()

    for epoch in range(n_epochs):
        loss, acc = train_step(model, loader, loss_fn, optimizer, temp)
        print(f"[Teacher] Epoch {epoch+1}/{n_epochs} | Loss: {loss:.4f} | Acc: {acc:.2f}%")


You can use a pre-trained resnet to speed up the training process.

In [4]:
teacher = resnet18(pretrained=True)
teacher.fc = nn.Linear(512, 10)
train_teacher(teacher, 15)



Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 71.4MB/s]


[Teacher] Epoch 1/15 | Loss: 1.4461 | Acc: 62.78%
[Teacher] Epoch 2/15 | Loss: 0.8037 | Acc: 74.28%
[Teacher] Epoch 3/15 | Loss: 0.6556 | Acc: 78.26%
[Teacher] Epoch 4/15 | Loss: 0.5899 | Acc: 79.99%
[Teacher] Epoch 5/15 | Loss: 0.5396 | Acc: 81.75%
[Teacher] Epoch 6/15 | Loss: 0.5101 | Acc: 82.67%
[Teacher] Epoch 7/15 | Loss: 0.4751 | Acc: 83.79%
[Teacher] Epoch 8/15 | Loss: 0.4463 | Acc: 84.79%
[Teacher] Epoch 9/15 | Loss: 0.4335 | Acc: 85.26%
[Teacher] Epoch 10/15 | Loss: 0.4090 | Acc: 85.77%
[Teacher] Epoch 11/15 | Loss: 0.3944 | Acc: 86.52%
[Teacher] Epoch 12/15 | Loss: 0.3804 | Acc: 86.95%
[Teacher] Epoch 13/15 | Loss: 0.3663 | Acc: 87.45%
[Teacher] Epoch 14/15 | Loss: 0.3540 | Acc: 87.73%
[Teacher] Epoch 15/15 | Loss: 0.3411 | Acc: 88.14%


## Test the teacher

In [5]:
def test_clean(model, dataloader=testloader):
    model.eval()
    correct, total = 0, 0

    with torch.no_grad():
        for x, y in dataloader:
            x, y = x.to(device), y.to(device)
            preds = model(x).argmax(dim=1)
            correct += (preds == y).sum().item()
            total += y.size(0)

    return 100 * correct / total

Print the clean accuracy of the teacher.

In [6]:
print(f'Teacher Accuracy {test_clean(teacher):.2f}%')

Teacher Accuracy 84.32%


## Train the student

In [7]:
def distill(model, teacher, dataloader, optimizer, T):
    model.train()
    teacher.eval()

    kl = nn.KLDivLoss(reduction="batchmean")
    total_loss, correct, total = 0, 0, 0

    for x, y in dataloader:
        x, y = x.to(device), y.to(device)

        with torch.no_grad():
            teacher_logits = teacher(x) / T
            soft_targets = F.softmax(teacher_logits, dim=1)

        student_logits = model(x) / T
        log_probs = F.log_softmax(student_logits, dim=1)

        loss = kl(log_probs, soft_targets) * (T * T)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * x.size(0)

        preds = model(x).argmax(dim=1)
        correct += (preds == y).sum().item()
        total += x.size(0)

    acc = 100 * correct / total
    return total_loss / total, acc


def train_student(model, teacher, n_epochs, loader=trainloader, temp=100):
    model.to(device)
    teacher.to(device)
    optimizer = Adam(model.parameters(), lr=1e-3)

    for epoch in range(n_epochs):
        loss, acc = distill(model, teacher, loader, optimizer, temp)
        print(f"[Student] Epoch {epoch+1}/{n_epochs} | Loss: {loss:.4f} | Acc: {acc:.2f}%")


This time use a `resnet18` without the pretrained weights.

In [8]:
student = resnet18(weights=None)
student.fc = nn.Linear(512, 10)
train_student(student, teacher, 15)

[Student] Epoch 1/15 | Loss: 13687.5224 | Acc: 40.46%
[Student] Epoch 2/15 | Loss: 8849.4277 | Acc: 58.35%
[Student] Epoch 3/15 | Loss: 6884.8912 | Acc: 65.99%
[Student] Epoch 4/15 | Loss: 5613.3054 | Acc: 71.01%
[Student] Epoch 5/15 | Loss: 4868.9447 | Acc: 73.62%
[Student] Epoch 6/15 | Loss: 4268.5552 | Acc: 75.90%
[Student] Epoch 7/15 | Loss: 3838.2570 | Acc: 77.63%
[Student] Epoch 8/15 | Loss: 3498.4464 | Acc: 78.94%
[Student] Epoch 9/15 | Loss: 3223.7420 | Acc: 79.68%
[Student] Epoch 10/15 | Loss: 3049.4182 | Acc: 80.55%
[Student] Epoch 11/15 | Loss: 2884.4784 | Acc: 81.21%
[Student] Epoch 12/15 | Loss: 2656.5752 | Acc: 81.92%
[Student] Epoch 13/15 | Loss: 2555.0217 | Acc: 82.31%
[Student] Epoch 14/15 | Loss: 2477.2885 | Acc: 82.75%
[Student] Epoch 15/15 | Loss: 2388.5061 | Acc: 83.09%


## Test the student

In [9]:
print(f'Student Accuracy {test_clean(student):.2f}%')

Student Accuracy 81.01%


# Attack (15 points)

Implement the FGSM attack and the `test_attack` funcion to report the robust accuracy for different values of epsilon.

In [18]:
def attack_fgsm(model, x, y, epsilon):
    model.eval()

    x_adv = x.clone().detach().requires_grad_(True)

    logits = model(x_adv) / 100
    loss = F.cross_entropy(logits, y)
    loss.backward()

    grad_sign = x_adv.grad.sign()
    x_adv = x_adv + epsilon * grad_sign

    x_adv = torch.max(torch.min(x_adv, upper_limit), lower_limit)

    return x_adv.detach()


def attack_pgd(model, x, y, epsilon, alpha=0.2, num_iters=10):
    model.eval()

    x_adv = x.clone().detach()
    x_adv = x_adv + torch.empty_like(x_adv).uniform_(-epsilon, epsilon)
    x_adv = torch.max(torch.min(x_adv, upper_limit), lower_limit)

    for _ in range(num_iters):
        x_adv.requires_grad_(True)

        logits = model(x_adv) / 100
        loss = F.cross_entropy(logits, y)
        loss.backward()

        grad_sign = x_adv.grad.sign()
        x_adv = x_adv + alpha * grad_sign

        x_adv = torch.max(torch.min(x_adv, x + epsilon), x - epsilon)

        x_adv = torch.max(torch.min(x_adv, upper_limit), lower_limit)

        x_adv = x_adv.detach()

    return x_adv


def test_attack(model, epsilon, atttack=attack_fgsm, loader=testloader):
    model.eval()
    correct, total = 0, 0

    for x, y in loader:
        x, y = x.to(device), y.to(device)

        x_adv = atttack(model, x, y, epsilon)

        with torch.no_grad():
            preds = model(x_adv).argmax(dim=1)

        correct += (preds == y).sum().item()
        total += y.size(0)

    return 100 * correct / total

Report the robust accuracy of the teacher for `ϵ = [1, 2, 4, 8, 16]`.

In [11]:
epsilons = [1, 2, 4, 8, 16]
scale = 1/std.mean().item()

for eps in epsilons:
    acc = test_attack(teacher, eps*scale/255, attack_fgsm)
    print(f'FGSM with ϵ={eps}/255 has Accuracy: {acc:.2f}%')
    acc = test_attack(teacher, eps*scale/255, attack_pgd)
    print(f'PGD  with ϵ={eps}/255 has Accuracy: {acc:.2f}%')

FGSM with ϵ=1/255 has Accuracy: 56.20%
PGD  with ϵ=1/255 has Accuracy: 54.72%
FGSM with ϵ=2/255 has Accuracy: 35.51%
PGD  with ϵ=2/255 has Accuracy: 32.80%
FGSM with ϵ=4/255 has Accuracy: 15.07%
PGD  with ϵ=4/255 has Accuracy: 12.63%
FGSM with ϵ=8/255 has Accuracy: 4.22%
PGD  with ϵ=8/255 has Accuracy: 0.19%
FGSM with ϵ=16/255 has Accuracy: 2.45%
PGD  with ϵ=16/255 has Accuracy: 0.00%


Do the same for the student:

In [14]:
for eps in epsilons:
    acc = test_attack(student, eps*scale/255, attack_fgsm)
    print(f'FGSM with ϵ={eps}/255 has Accuracy: {acc:.2f}%')
    acc = test_attack(student, eps*scale/255, attack_pgd)
    print(f'PGD  with ϵ={eps}/255 has Accuracy: {acc:.2f}%')

FGSM with ϵ=1/255 has Accuracy: 73.09%
PGD  with ϵ=1/255 has Accuracy: 72.58%
FGSM with ϵ=2/255 has Accuracy: 72.53%
PGD  with ϵ=2/255 has Accuracy: 72.19%
FGSM with ϵ=4/255 has Accuracy: 72.38%
PGD  with ϵ=4/255 has Accuracy: 71.89%
FGSM with ϵ=8/255 has Accuracy: 72.33%
PGD  with ϵ=8/255 has Accuracy: 71.57%
FGSM with ϵ=16/255 has Accuracy: 72.31%
PGD  with ϵ=16/255 has Accuracy: 69.12%


What do you see?

The student’s accuracy does not drop much under attack and FGSM and PGD perform similarly which indicates gradient masking rather than true robustness

# Transferring Adversarial Examples (15 points)

Train yet another model to be used as the surrogate. (set temperature to 1)

In [15]:
surrogate = resnet18(weights=None)
surrogate.fc = nn.Linear(512, 10)
train_teacher(surrogate, 10, temp=1)

[Teacher] Epoch 1/10 | Loss: 1.5610 | Acc: 43.08%
[Teacher] Epoch 2/10 | Loss: 1.1894 | Acc: 57.54%
[Teacher] Epoch 3/10 | Loss: 1.0183 | Acc: 63.97%
[Teacher] Epoch 4/10 | Loss: 0.9176 | Acc: 67.89%
[Teacher] Epoch 5/10 | Loss: 0.8460 | Acc: 70.33%
[Teacher] Epoch 6/10 | Loss: 0.7858 | Acc: 72.65%
[Teacher] Epoch 7/10 | Loss: 0.7417 | Acc: 74.24%
[Teacher] Epoch 8/10 | Loss: 0.6960 | Acc: 75.87%
[Teacher] Epoch 9/10 | Loss: 0.6635 | Acc: 77.02%
[Teacher] Epoch 10/10 | Loss: 0.6417 | Acc: 77.55%


Print the surrogate accuracy.

In [16]:
print(f'Surrogate Accuracy {test_clean(surrogate):.2f}%')

Surrogate Accuracy 77.05%


Report the accuracy of the surrogate for `ϵ = [1, 2, 4, 8, 16]`.

In [19]:
for eps in epsilons:
    acc = test_attack(surrogate, eps*scale/255, attack_fgsm)
    print(f'FGSM with ϵ={eps}/255 has Accuracy: {acc:.2f}%')

FGSM with ϵ=1/255 has Accuracy: 61.33%
FGSM with ϵ=2/255 has Accuracy: 45.28%
FGSM with ϵ=4/255 has Accuracy: 22.58%
FGSM with ϵ=8/255 has Accuracy: 5.78%
FGSM with ϵ=16/255 has Accuracy: 1.13%


Implement the following functions to transfer attacks from a surrogate model to an oracle.

In [21]:
def transfer_attack(oracle, model, eps, loader=testloader):
    oracle.eval()
    correct, total = 0, 0

    for x, y in loader:
        x, y = x.to(device), y.to(device)
        x_adv = attack_fgsm(model, x, y, eps)
        preds = oracle(x_adv).argmax(dim=1)
        correct += (preds == y).sum().item()
        total += y.size(0)

    return 100 * correct / total

Transfer attacks for `ϵ = [1, 2, 4, 8, 16]` from your model to the student.

In [22]:
for eps in epsilons:
    acc = transfer_attack(student, surrogate, eps*scale/255)
    print(f'FGSM with ϵ={eps}/255 has Accuracy: {acc:.2f}%')

FGSM with ϵ=1/255 has Accuracy: 75.77%
FGSM with ϵ=2/255 has Accuracy: 69.70%
FGSM with ϵ=4/255 has Accuracy: 56.54%
FGSM with ϵ=8/255 has Accuracy: 33.67%
FGSM with ϵ=16/255 has Accuracy: 13.42%


- What can be inferred from these results?

DD does not provide real robustness and the model’s accuracy drops under adversarial attacks as epsilon increases

- How are the accuracies of the student and the surrogate under attack related?

They are closely related because adversarial examples transfer from the surrogate to the student and causing similar accuracy degradation

- Does Defensive Distillation obfuscate the gradients? Why?

Yes.
high T training smooths the gradients but this only masks them and does not prevent successful iterative or transfer attacks

# ZOO Based Black-Box Attacks (25 points)

Based on [Black-box Adversarial Attacks with Limited Queries and Information](https://arxiv.org/abs/1804.08598) you must first calculate the estimate of the graidents, and next attack the model based on your estimates.

In [23]:
def nes_gradient_estimate(model, x, y, epsilon, num_samples, sigma):
    model.eval()
    grad_est = torch.zeros_like(x)

    for _ in range(num_samples):
        u = torch.randn_like(x)
        u = u / torch.norm(u, p=2)

        x_pos = x + sigma * u
        x_neg = x - sigma * u

        with torch.no_grad():
            loss_pos = F.cross_entropy(model(x_pos), y)
            loss_neg = F.cross_entropy(model(x_neg), y)

        grad_est += (loss_pos - loss_neg) * u

    grad_est = grad_est / (2 * sigma * num_samples)
    return grad_est

I used 3 different things to estimate gradiant and all of them end up almost the same result. The bottom result is made with probabilities.

In [24]:
def partial_information_attack(model, x, y, epsilon, num_samples, sigma, num_steps, alpha):
    x_adv = x.clone().detach()

    for _ in range(num_steps):
        grad_est = nes_gradient_estimate(
            model, x_adv, y, epsilon, num_samples, sigma
        )

        x_adv = x_adv + alpha * grad_est.sign()

        x_adv = torch.max(torch.min(x_adv, x + epsilon), x - epsilon)

        x_adv = torch.max(torch.min(x_adv, upper_limit), lower_limit)

    return x_adv.detach()

Now run this attack on your models and report the results. (You **DON'T** need to run the attack for the entire test dataset as this will take a lot of time!)

In [37]:
def test_zoo_attack(model, epsilon, num_samples, sigma, num_steps, alpha, loader):
    model.eval()
    correct, total = 0, 0

    total_batches = len(loader)
    num_batches_to_use = max(1, int(0.5 * total_batches))

    for i, (x, y) in enumerate(loader):
        if i >= num_batches_to_use:
            break

        x, y = x.to(device), y.to(device)

        x_adv = partial_information_attack(
            model, x, y, epsilon,
            num_samples, sigma, num_steps, alpha
        )

        with torch.no_grad():
            preds = model(x_adv).argmax(dim=1)

        correct += (preds == y).sum().item()
        total += y.size(0)

    return 100 * correct / total


In [40]:
epsilons = [1, 2, 4, 8, 16]

for eps in epsilons:
    acc = test_zoo_attack(model=surrogate, epsilon=eps*scale/255, num_samples=100, sigma=0.001, num_steps=10, alpha=0.1, loader=testloader)
    print(f'ZOO with ϵ={eps}/255 has Accuracy: {acc:.2f}%')

ZOO with ϵ=1/255 has Accuracy: 71.22%
ZOO with ϵ=2/255 has Accuracy: 68.46%
ZOO with ϵ=4/255 has Accuracy: 46.51%
ZOO with ϵ=5/255 has Accuracy: 14.38%
ZOO with ϵ=16/255 has Accuracy: 3.12%


# Adversarially Robust Distillation (15 points)

In this section we are going to test another type of distillation to see if this method is robust. This technique is [Adversarially Robust Distillation](https://arxiv.org/abs/1905.09747).



1.   We will try to distill a robsut teacher from [Robust Bench](https://robustbench.github.io/) onto a smaller architecture.
2.   We minimize the KL-Divergence between the logits of the student and teacher to ensure fidelity. (You can also incorporate the classification loss as mentioned in the paper but you can choose to ignore it as well)
3.   At each step of the distillation you will attack the student (you can use either FGSM or PGD) and find an adversarial example $X + \delta$ for data point $X$. Next you will minimize $t^2 \times \text{KL}(S(X+\delta), T(X))$ where $S$ and $T$ are the student and teacher networks respectively.



In [41]:
! pip install git+https://github.com/RobustBench/robustbench.git

Collecting git+https://github.com/RobustBench/robustbench.git
  Cloning https://github.com/RobustBench/robustbench.git to /tmp/pip-req-build-r8ifwdco
  Running command git clone --filter=blob:none --quiet https://github.com/RobustBench/robustbench.git /tmp/pip-req-build-r8ifwdco
  Resolved https://github.com/RobustBench/robustbench.git to commit 78fcc9e48a07a861268f295a777b975f25155964
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting autoattack@ git+https://github.com/fra31/auto-attack.git@a39220048b3c9f2cca9a4d3a54604793c68eca7e#egg=autoattack (from robustbench==1.1)
  Cloning https://github.com/fra31/auto-attack.git (to revision a39220048b3c9f2cca9a4d3a54604793c68eca7e) to /tmp/pip-install-ny6hf4fi/autoattack_6b2b9fdd07054b7a8195b0ae167c05cb
  Running command git clone --filter=blob:none --quiet https://github.com/fra31/auto-attack.git /tmp/pip-install-ny6hf4fi/autoattack_6b2b9fdd07054b7a8195b0ae167c05cb
  Running command git rev-parse -q --verify 'sha^a39220048b3c9f2c

In [42]:
from robustbench.utils import load_model

teacher = load_model(model_name='Gowal2021Improving_R18_ddpm_100m', dataset='cifar10', threat_model='Linf')

Downloading models/cifar10/Linf/Gowal2021Improving_R18_ddpm_100m.pt (gdrive_id=1-0EuCJashqSXEkkd1DOzFA4tH8KL2kim).


Downloading...
From (original): https://drive.google.com/uc?id=1-0EuCJashqSXEkkd1DOzFA4tH8KL2kim
From (redirected): https://drive.google.com/uc?id=1-0EuCJashqSXEkkd1DOzFA4tH8KL2kim&confirm=t&uuid=1c031c21-0c63-4b4f-8edd-e533e197bbba
To: /content/models/cifar10/Linf/Gowal2021Improving_R18_ddpm_100m.pt
100%|██████████| 50.3M/50.3M [00:00<00:00, 59.0MB/s]


In [43]:
def ard(student, teacher, dataloader, optimizer, eps, attack):
    student.train()
    teacher.eval()

    kl = nn.KLDivLoss(reduction="batchmean")
    total_loss, correct, total = 0, 0, 0

    for x, y in dataloader:
        x, y = x.to(device), y.to(device)

        x_adv = attack(student, x, y, eps)

        with torch.no_grad():
            teacher_logits = teacher(x)
            teacher_probs = F.softmax(teacher_logits, dim=1)

        student_logits = student(x_adv)
        student_log_probs = F.log_softmax(student_logits, dim=1)

        loss = kl(student_log_probs, teacher_probs)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * x.size(0)

        preds = student(x).argmax(dim=1)
        correct += (preds == y).sum().item()
        total += x.size(0)

    return total_loss / total, 100 * correct / total



def adv_train_student(model, teacher, n_epochs, eps=8/255, loader=trainloader):
    model.to(device)
    teacher.to(device)
    optimizer = Adam(model.parameters(), lr=1e-3)

    for epoch in range(n_epochs):
        loss, acc = ard(model, teacher, loader, optimizer, eps, attack_fgsm)
        print(f"[ARD] Epoch {epoch+1}/{n_epochs} | Loss: {loss:.4f} | Acc: {acc:.2f}%")

In [46]:
student = mobilenet_v2(weights=None)
student.classifier[1] = nn.Linear(student.last_channel, 10)

adv_train_student(student, teacher, n_epochs=10)

Epoch [1/15] - Loss: 2.8451, Accuracy: 16.92%
Epoch [2/15] - Loss: 2.3718, Accuracy: 21.08%
Epoch [3/15] - Loss: 2.2146, Accuracy: 22.87%
Epoch [4/15] - Loss: 2.1439, Accuracy: 24.91%
Epoch [5/15] - Loss: 2.0872, Accuracy: 26.43%
Epoch [6/15] - Loss: 2.0285, Accuracy: 26.01%
Epoch [7/15] - Loss: 1.9724, Accuracy: 28.95%
Epoch [8/15] - Loss: 1.9461, Accuracy: 29.67%
Epoch [9/15] - Loss: 1.9218, Accuracy: 31.42%
Epoch [10/15] - Loss: 1.8986, Accuracy: 30.88%
Epoch [11/15] - Loss: 1.8627, Accuracy: 32.76%
Epoch [12/15] - Loss: 1.8349, Accuracy: 33.54%
Epoch [13/15] - Loss: 1.8183, Accuracy: 32.97%
Epoch [14/15] - Loss: 1.7694, Accuracy: 34.61%
Epoch [15/15] - Loss: 1.7532, Accuracy: 33.85%


Now report the accuracy of the student on the test dataset.

In [48]:
print("Student Accuracy:", test_clean(student))

print("FGSM with ϵ=8/255 has Accuracy:", test_attack(student, 8/255, attack_fgsm))

print("PGD with ϵ=8/255 has Accuracy:", test_attack(student, 8/255, attack_pgd))


Student Accuracy: 27.63%
FGSM with ϵ=8/255 has Accuracy: 16.16%
PGD with ϵ=8/255 has Accuracy: 14.98%
