In [1]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

from art.attacks.fast_gradient import FastGradientMethod
from art.classifiers.pytorch import PyTorchClassifier
from art.utils import load_mnist

In [6]:
# Step 0: Define the neural network model, return logits instead of activation in forward method

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Sequential(   # (1,28,28)   
            # in_channels:输入的通道数， out_channels：卷积核数量， kernel_size：卷积核大小， stride：步长
            # stride=1时， padding=(kernel_size-1)/2， 图片长宽不变
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2),    # (16,28,28)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),    # (16,14,14)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2),    # (32,14,14)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),    # （32,7,7）
        )
        self.fc1 = nn.Sequential(
            nn.Linear(32*7*7, 500),
            nn.ReLU()
        )
        self.fc2 = nn.Linear(500, 10)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)    # (batch,32,7,7)
        x = x.view(x.size(0), -1)    # (batch,32*7*7)
        x = self.fc1(x)
        output = self.fc2(x)
        return output

In [40]:
# Step 1: Load the MNIST dataset

(x_train, y_train), (x_test, y_test), min_pixel_value, max_pixel_value = load_mnist()

In [41]:
print('min_pixel_value: ', min_pixel_value)
print('max_pixel_value: ', max_pixel_value)

min_pixel_value:  0.0
max_pixel_value:  1.0


In [42]:
# Step 1a: Swap axes to PyTorch's NCHW format

x_train = x_train.transpose(0, 3, 1, 2).astype(np.float32)
x_test = x_test.transpose(0, 3, 1, 2).astype(np.float32)

In [43]:
# Step 2: Create the model

model = Net()

In [44]:
# Step 2a: Define the loss function and the optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

In [45]:
# Step 3: Create the ART classifier

classifier = PyTorchClassifier(model=model, clip_values=(min_pixel_value, max_pixel_value), loss=criterion,
                               optimizer=optimizer, input_shape=(1, 28, 28), nb_classes=10)

In [46]:
# Step 4: Train the ART classifier

classifier.fit(x_train, y_train, batch_size=64, nb_epochs=3)

In [47]:
# Step 5: Evaluate the ART classifier on benign test examples

predictions = classifier.predict(x_test)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print('Accuracy on benign test examples: {}%'.format(accuracy * 100))

Accuracy on benign test examples: 97.8%


In [48]:
# Step 6: Generate adversarial test examples
attack = FastGradientMethod(classifier=classifier, eps=0.2)
x_test_adv = attack.generate(x=x_test)

In [49]:
# Step 7: Evaluate the ART classifier on adversarial test examples

predictions = classifier.predict(x_test_adv)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print('Accuracy on adversarial test examples: {}%'.format(accuracy * 100))

Accuracy on adversarial test examples: 13.52%
