In [26]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import LightSource
import torch.nn.functional as F

%matplotlib inline
%config InlineBackend.figure_format = 'svg'

In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# download MNIST training and testing datasets, then prepare corresponding dataloaders (batch size = 100)
mnist_train = datasets.MNIST("../data", train=True, download=True, transform=transforms.ToTensor())
mnist_test = datasets.MNIST("../data", train=False, download=True, transform=transforms.ToTensor())
train_loader = DataLoader(mnist_train, batch_size = 100, shuffle=True)
test_loader = DataLoader(mnist_test, batch_size = 100, shuffle=False)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [22]:
# initialize the CNN architecture with 4 convolutional layers and 2 MLP layers for standard training
torch.manual_seed(0)

class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.shape[0], -1)

model_cnn = nn.Sequential(nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(),
                          nn.Conv2d(32, 32, 3, padding=1, stride=2), nn.ReLU(),
                          nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
                          nn.Conv2d(64, 64, 3, padding=1, stride=2), nn.ReLU(),
                          Flatten(),
                          nn.Linear(7*7*64, 100), nn.ReLU(),
                          nn.Linear(100, 10)).to(device)

In [38]:
### Your task: complete the following function



def pgd(model, X, y, epsilon=0.1, alpha=0.02, num_iter=10):
    """Construct adversarial examples using PGD with TRADES"""

    model.train()

    delta = torch.zeros_like(X, requires_grad=True)
    criterion = nn.CrossEntropyLoss()

    pert = X.clone().detach()
    pert = pert.to(device)

    for _ in range(num_iter):
        output = model(pert + delta)
        loss =criterion(output, y)

        model.zero_grad()
        loss.backward()

        delta.data = (delta + alpha * delta.grad.detach().sign()).clamp(-epsilon, epsilon)
        delta.grad.zero_()

    perturbed_X = torch.clamp(pert + delta, 0, 1)
    return perturbed_X

In [48]:
#### Your task: complete the following functions

def epoch(loader, model, opt=None):
    """Standard training/evaluation epoch over the dataset"""
    criterion = nn.CrossEntropyLoss()
    running_loss = 0.0
    correct = 0.0
    total = 0.0

    if opt:
        model.train()
    else:
        model.eval()

    for inputs, labels in loader:
        inputs, labels = inputs.to(device), labels.to(device)

        if opt:
            opt.zero_grad()

        inputs.requires_grad_()

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        if opt:
            loss.backward()
            opt.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    epoch_loss = running_loss / len(loader.dataset)
    epoch_acc = 100.0 * correct / total

    return epoch_loss, epoch_acc



In [50]:
def epoch_adv(loader, model, attack, opt=None, **kwargs):
    """Adversarial training/evaluation epoch over the dataset"""
    criterion = nn.CrossEntropyLoss()
    running_loss = 0.0
    correct = 0.0
    total = 0.0

    if opt:
        model.train()
    else:
        model.eval()

    for inputs, labels in loader:
        inputs, labels = inputs.to(device), labels.to(device)

        adv_inputs = attack(model, inputs, labels, **kwargs)

        if opt:
            opt.zero_grad()

        adv_inputs.requires_grad_()
        outputs = model(adv_inputs)
        loss = criterion(outputs, labels)

        if opt:
            loss.backward()
            opt.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    epoch_loss = running_loss / len(loader.dataset)
    epoch_acc = 100.0 * correct / total

    return epoch_loss, epoch_acc



In [51]:
# specify the optimizer as SGD

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# specify the optimizer as SGD
opt = optim.SGD(model_cnn.parameters(), lr=1e-1)

# standard training
for t in range(5):
    train_err, train_loss = epoch(train_loader, model_cnn, opt)
    test_err, test_loss = epoch(test_loader, model_cnn)
    adv_err, adv_loss = epoch_adv(test_loader, model_cnn, pgd)

    print(*("{:.6f}".format(i) for i in (train_err, test_err, adv_err)), sep="\t")

# save the standard trained model for further evaluation
torch.save(model_cnn.state_dict(), "model_cnn.pt")

0.000032	0.050911	3.712060
0.000029	0.051287	3.798647
0.000026	0.051819	3.795244
0.000023	0.052474	3.814792
0.000022	0.052841	3.860831


By the results of the standard training we can see how senstive the model attacks. The output suggests that we have very high adverserial error, which is expected, since the model is not trained on adverserial examples.

In [52]:
# use the same CNN architecture for robust training
model_cnn_robust = nn.Sequential(nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(),
                                 nn.Conv2d(32, 32, 3, padding=1, stride=2), nn.ReLU(),
                                 nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
                                 nn.Conv2d(64, 64, 3, padding=1, stride=2), nn.ReLU(),
                                 Flatten(),
                                 nn.Linear(7*7*64, 100), nn.ReLU(),
                                 nn.Linear(100, 10)).to(device)

In [53]:
# specify the optimizer as SGD
opt = optim.SGD(model_cnn_robust.parameters(), lr=1e-1)

# PGD-based adversarial training
for t in range(5):
    train_err, train_loss = epoch_adv(train_loader, model_cnn_robust, pgd, opt)
    test_err, test_loss = epoch(test_loader, model_cnn_robust)
    adv_err, adv_loss = epoch_adv(test_loader, model_cnn_robust, pgd)

    print(*("{:.6f}".format(i) for i in (train_err, test_err, adv_err)), sep="\t")

# save the standard trained model for further evaluation
torch.save(model_cnn_robust.state_dict(), "model_cnn_robust.pt")

1.583447	0.147872	0.332344
0.232554	0.060061	0.155951
0.149133	0.048194	0.138092
0.116772	0.037064	0.110379
0.097984	0.033818	0.101471


Here we can see that the output of adverserial error is decreased since model got more robust after training with adverserial examples. However, we can see that there is a difference in the outputs test error for standard training. There is slight increase in them, suggesting there might be a trade of between robustness and accuracy. But with more iteration that difference also disappears.


In [54]:
# load the standard trained and adversarially trained models
model_cnn.load_state_dict(torch.load("model_cnn.pt"))
model_cnn_robust.load_state_dict(torch.load("model_cnn_robust.pt"))

<All keys matched successfully>

In [55]:
def fgsm(model, X, y, epsilon=0.1):
    """ Construct FGSM adversarial examples for the example (X,y)"""
    delta = torch.zeros_like(X, requires_grad=True)
    loss = nn.CrossEntropyLoss()(model(X + delta), y)
    loss.backward()
    return epsilon * delta.grad.detach().sign()

In [56]:
# clean performance (no attack)
print("clean:", "{:.4f}".format(epoch(test_loader, model_cnn)[0]),
      "{:.4f}".format(epoch(test_loader, model_cnn_robust)[0]))

# evaluate both models using FGSM attack
print("FGSM: ", "{:.4f}".format(epoch_adv(test_loader, model_cnn, fgsm)[0]),
      "{:.4f}".format(epoch_adv(test_loader, model_cnn_robust, fgsm)[0]))

# evaluate both models using PGD attack
print("PGD (10 iter):", "{:.4f}".format(epoch_adv(test_loader, model_cnn, pgd, num_iter=10)[0]),
      "{:.4f}".format(epoch_adv(test_loader, model_cnn_robust, pgd, num_iter=10)[0]))

clean: 0.0528 0.0338
FGSM:  6.2064 2.3046
PGD (10 iter): 3.8608 0.1015


The evaluation of the models using two different attacks show, that the robust model is less sensitive to pgd attack but still shows high sensitivity to the FGSM attack. This suggests that the model is vulnerable against small perturbations used by FSGM Attack. However, we can also see that the error decreases for the model that was trained on adverserial examples.

In [None]:
#### Your task (bonus): develop an attack method to achieve an attack success rate as high as possible. You can modify the following function if needed.

# You can try out some of the attack methods introduced in Lectures 3-4 or develop your unique creative attack.
# In principle, the performance of your attack should be better than FGSM or PGD, 10 iter;
# The higher attack success rates you can achieve, the higher credits you may receive.

def my_attack(model, X, y, epsilon=0.1):
  """ Construct adversarial examples for the example (X,y)"""

  return

In [None]:
print("My Attack: ", "{:.4f}".format(epoch_adv(test_loader, model_cnn, my_attack)[0]),
      "{:.4f}".format(epoch_adv(test_loader, model_cnn_robust, my_attack)[0]))