<a href="https://colab.research.google.com/github/GiuliaLanzillotta/exercises/blob/master/Adversarial_attacks_on_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Adversarial attacks on MNIST

Today we're going to do 2 things: 
- Traine a naive CNN on MNIST
- Attack it with different techniques

Let's start!



In [None]:
# uncomment if not already installed  
# !pip install tensorboardX

In [4]:
# Imports + constants
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import time
from torchvision import datasets, transforms

use_cuda = False
device = torch.device("cuda" if use_cuda else "cpu")
batch_size = 64

import matplotlib.pyplot as plt

# Reproducibility 
# Notice that complete reproducibility is not guaranteed anyway 
# (for example due to the non perfect associativity of floating point addition)
# Look here for more : https://pytorch.org/docs/stable/notes/randomness.html
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7f78b74c65a0>

## MNIST classifier

### The architecture

In [6]:
class Net(nn.Module):
  """2 layers feed-forward classifier for MNIST images"""
  def __init__(self):
        super(Net, self).__init__()
        self.fc = nn.Linear(28*28, 200)
        self.fc2 = nn.Linear(200,10)

  def forward(self, x):
      x = x.view((-1, 28*28))
      x = F.relu(self.fc(x))
      x = self.fc2(x)
      return x

In [8]:
class ConvNet(nn.Module):
  """Pretty basic CNN classifier for MNIST images."""
  def __init__(self):
      # We'll use 6 convolutional layers with decreasing convolution window 
      # and increasing number of channels 
      # + ReLU after each layer 
      # + batch normalization
      # + dropout and 2 fully connected layers as a classification head 
      super(ConvNet, self).__init__()
      self.conv1 = nn.Conv2d(1, 32, kernel_size=(5, 5))
      self.bn1 = nn.BatchNorm2d(32)
      self.conv2 = nn.Conv2d(32, 32, kernel_size=(5, 5))
      self.bn2 = nn.BatchNorm2d(32)
      self.conv3 = nn.Conv2d(32, 64, kernel_size=(3, 3))
      self.bn3 = nn.BatchNorm2d(64)
      self.conv4 = nn.Conv2d(64, 64, kernel_size=(3, 3))
      self.bn4 = nn.BatchNorm2d(64)
      self.conv5 = nn.Conv2d(64, 128, kernel_size=(3, 3))
      self.bn5 = nn.BatchNorm2d(128)
      self.conv6 = nn.Conv2d(128, 128, kernel_size=(1, 1))
      self.bn6 = nn.BatchNorm2d(128)
      self.conv2_drop = nn.Dropout2d(p=0.2)
      self.fc1 = nn.Linear(128, 100)
      self.fc2 = nn.Linear(100, 10)

  def forward(self, x):
      ##  CONVOLUTIONAL LAYERS 
      x = F.relu(self.conv1(x))
      x = self.bn1(x)
      x = F.relu(self.conv2(x))
      x = self.conv2_drop(F.max_pool2d(self.bn2(x), 2))
      x = F.relu(self.conv3(x))
      x = self.bn3(x)
      x = F.relu(self.conv4(x))
      x = self.bn4(x)
      x = F.max_pool2d(x, 2)
      x = self.conv2_drop(x)
      x = F.relu(self.conv5(x))
      x = self.bn5(x)
      x = F.relu(self.conv6(x))
      x = self.bn6(x)
      ## CLASSIFICATION HEAD 
      size = x.size()[1] * x.size()[2] * x.size()[3]
      # flattening 
      x = x.view(-1, size)
      x = F.relu(self.fc1(x))
      x = self.fc2(x)
      return x


### The data 

In [None]:
# Here do 2 things: 
# 1. Download the MNIST dataset (already divided into train and test)
# 2. normalize the input s.t. we have a certain mean and sd 
train_dataset = datasets.MNIST('mnist_data/', train=True, download=True, transform=transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
))
test_dataset = datasets.MNIST('mnist_data/', train=False, download=True, transform=transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
))

In [10]:
# Use a DataLoader to avoid iterating through the data yourself
# Notice the batch_size=64 that we defined above 
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

### Training 

In [22]:
# (This has any effect only if the model is not already there)
model = Net().to(device)

#This has any effect only on certain modules
# (e.g. Dropout, BatchNorm) which behave differently 
# in train and test mode.
model.train()

Net(
  (fc): Linear(in_features=784, out_features=200, bias=True)
  (fc2): Linear(in_features=200, out_features=10, bias=True)
)

In [23]:
learning_rate = 0.0001
num_epochs = 20
# Optimizers:
""" SGD vs Adam """
#opt = optim.SGD(params=model.parameters(), lr=learning_rate)
opt = optim.Adam(params=model.parameters(), lr=learning_rate)
# Loss: 
ce_loss = torch.nn.CrossEntropyLoss()

In [25]:
tot_steps = 0
for epoch in range(1,num_epochs+1):
  print("-------------- Epoch "+str(epoch)+"-------------")
  t1 = time.time()
  for batch_idx, (x_batch, y_batch) in enumerate(train_loader):
    x_batch, y_batch = x_batch.to(device), y_batch.to(device)
    tot_steps += 1
    opt.zero_grad()
    out = model(x_batch)
    batch_loss = ce_loss(out, y_batch)

    # show accuracy every 100 steps
    if batch_idx % 100 == 0:
      pred = torch.max(out, dim=1)[1] # predictions
      acc = pred.eq(y_batch).sum().item() / float(batch_size) # accuracy
      print("Batch "+str(batch_idx)+": "+ str(acc))

    batch_loss.backward()
    opt.step() 
  t2 = time.time()
  print("Time = %.2lf seconds"%(t2-t1))

SyntaxError: ignored

In [None]:
# Evaluate on the test set 
tot_test, tot_acc = 0.0, 0.0
for batch_idx, (x_batch, y_batch) in enumerate(test_loader):
    x_batch, y_batch = x_batch.to(device), y_batch.to(device)
    out = model(x_batch)
    pred = torch.max(out, dim=1)[1]
    acc = pred.eq(y_batch).sum().item()
    tot_acc += acc
    tot_test += x_batch.size()[0]
print('Accuracy %.5lf ')

## Attacks

In [None]:
# loading the dataset
# note that this time we do not perfrom the normalization operation, see next cell
test_dataset = datasets.MNIST('mnist_data/', train=False, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))

In [None]:
class Normalize(nn.Module):
    def forward(self, x):
        return (x - 0.1307)/0.3081

# we load the body of the neural net trained with mnist_train.ipynb...
model = torch.load('model.net', map_location='cpu') 

# ... and add the data normalization as a first "layer" to the network
# this allows us to search for adverserial examples to the real image, rather than
# to the normalized image
model = nn.Sequential(Normalize(), model)

# and here we also create a version of the model that outputs the class probabilities
model_to_prob = nn.Sequential(model, nn.Softmax())

# we put the neural net into evaluation mode (this disables features like dropout)
model.eval()
model_to_prob.eval()

In [None]:
# define a show function for later
def show(original, adv, model_to_prob):
    p0 = model_to_prob(original).detach().numpy()
    p1 = model_to_prob(adv).detach().numpy()
    f, axarr = plt.subplots(1,2)
    axarr[0].imshow(original.detach().numpy().reshape(28, 28), cmap='gray')
    axarr[0].set_title("Original, class: " + str(p0.argmax()))
    axarr[1].imshow(adv.detach().numpy().reshape(28, 28), cmap='gray')
    axarr[1].set_title("Original, class: " + str(p1.argmax()))
    print("Class\t\tOrig\tAdv")
    for i in range(10):
        print("Class {}:\t{:.2f}\t{:.2f}".format(i, float(p0[:, i]), float(p1[:, i])))

In [None]:
def fgsm_targeted(model, x, target, eps):
    # TODO: implement
    return x

def fgsm_untargeted(model, x, label, eps):
    # TODO: implement
    return x

def pgd_targeted(model, x, target, k, eps, eps_step):
    # TODO: implement
    return x

def pgd_untargeted(model, x, label, k, eps, eps_step):
    # TODO: implement
    return x

In [None]:
# try out our attacks
original = torch.unsqueeze(test_dataset[0][0], dim=0)
adv = pgd_untargeted(model, original, 7, 10, 0.08, 0.05, clip_min=0, clip_max=1.0)
show(original, adv, model_to_prob)