In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from torch.autograd import grad
import matplotlib.pyplot as plt
import numpy as np
import glob
import cv2
from torch.utils.data import Dataset, DataLoader

In [2]:
class Args():
    def __init__(self,batch_size=64,test_batch_size=1000, \
                 lr=0.1, momentum=0.5, epochs=10, log_interval=10000,\
                 alpha = 0.005, norm_order = 1):
        self.batch_size = batch_size  #training batch size
        self.test_batch_size = test_batch_size   #testing batch size
        self.lr = lr
        self.momentum = momentum
        self.epochs = epochs
        self.log_interval = log_interval
        self.norm_order = norm_order
        self.alpha = alpha
args = Args()

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [4]:
# dataset prepare
kwargs = {'num_workers': 1, 'pin_memory': True}
train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=True, transform = transforms.ToTensor()),
        batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=False, transform = transforms.ToTensor()),
        batch_size=args.test_batch_size, shuffle=True, **kwargs)

In [7]:
# blurred dataset
# test_dataset_orignial = datasets.MNIST('../data', train=False)

In [5]:
# model and optimizer
model = Net().to("cuda")
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

In [9]:
def derivative_heatmap(model, dataset, idx):
    data, target = dataset[idx][0].unsqueeze(0).cuda(), dataset[idx][1].unsqueeze(0).cuda()
    data.requires_grad = True
    output = model(data)
    loss = F.nll_loss(output,target)
    grad = torch.autograd.grad(loss,data,create_graph=False)[0]
    grad = grad.squeeze()
    norm = torch.norm(grad, p=1)
    grad = np.array(grad.cpu()) # tensor to np
    #plt.imshow(abs(grad), cmap='hot', interpolation='nearest')
    #plt.colorbar()
    #return norm
    return abs(grad)

In [29]:
def train_original(args, model,train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

In [17]:
def train(args, model,train_loader, optimizer, epoch):
    model.train()
    new_loss_total = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        data.requires_grad = True
        output = model(data)
        loss = F.nll_loss(output, target)
        grad = torch.autograd.grad(loss,data,create_graph=True)[0]
        adv_loss = torch.norm(grad, p = args.norm_order)
        new_loss = loss + args.alpha * adv_loss
        new_loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

In [10]:
def test(args, model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.cuda(), target.cuda()
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [31]:
map = []
idx = 1
for epoch in range(1, args.epochs + 1):
    train_original(args, model, train_loader, optimizer, epoch)
    test(args, model, test_loader)
    grad = derivative_heatmap(model, test_loader.dataset, idx)
    map.append(grad)


Test set: Average loss: 0.0643, Accuracy: 9792/10000 (98%)


Test set: Average loss: 0.0377, Accuracy: 9884/10000 (99%)


Test set: Average loss: 0.0296, Accuracy: 9901/10000 (99%)


Test set: Average loss: 0.0315, Accuracy: 9902/10000 (99%)


Test set: Average loss: 0.0248, Accuracy: 9919/10000 (99%)


Test set: Average loss: 0.0422, Accuracy: 9869/10000 (99%)


Test set: Average loss: 0.0253, Accuracy: 9924/10000 (99%)


Test set: Average loss: 0.0248, Accuracy: 9931/10000 (99%)


Test set: Average loss: 0.0286, Accuracy: 9921/10000 (99%)


Test set: Average loss: 0.0281, Accuracy: 9927/10000 (99%)



In [1]:
# FGSM attack code
def fgsm_attack(image, epsilon, data_grad):
    sign_data_grad = data_grad.sign()
    perturbed_image = image + epsilon*sign_data_grad
    perturbed_image = torch.clamp(perturbed_image, 0, 1)
    return perturbed_image

In [None]:
def test(model, test_loader, epsilon):

    correct = 0

    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        data.requires_grad = True
        output = model(data)
        init_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability

        # If the initial prediction is wrong, dont bother attacking, just move on
        if init_pred.item() != target.item():
            continue

        # Calculate the loss
        loss = F.nll_loss(output, target)

        # Zero all existing gradients
        model.zero_grad()

        # Calculate gradients of model in backward pass
        loss.backward()

        # Collect datagrad
        data_grad = data.grad.data

        # Call FGSM Attack
        perturbed_data = fgsm_attack(data, epsilon, data_grad)

        # Re-classify the perturbed image
        output = model(perturbed_data)

        # Check for success
        final_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
        if final_pred.item() == target.item():
            correct += 1
            # Special case for saving 0 epsilon examples
            if (epsilon == 0) and (len(adv_examples) < 5):
                adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
                adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )
        else:
            # Save some adv examples for visualization later
            if len(adv_examples) < 5:
                adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
                adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )

    # Calculate final accuracy for this epsilon
    final_acc = correct/float(len(test_loader))
    print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, correct, len(test_loader), final_acc))

    # Return the accuracy and an adversarial example
    return final_acc, adv_examples