# Adversarial Training

In [2]:
import numpy as np
import torch.nn as nn
from torch import optim
import torchvision.utils
from torchvision import models, datasets
import torchvision.transforms as transforms
from torchattacks import *
import torchattacks
from torchattacks import PGD, FGSM
from py.train import train_on_attack
import torch
import matplotlib.pyplot as plt
from py.train import test
from py.utils import get_default_device, plot_training_loss
from algo.variable_optim import VSGD
import pandas as pd
from py import nets

%matplotlib inline

n_epochs = 6
batch_size = 128
learning_rate = 0.01
momentum=0.9

# Load Data

In [3]:
transform = transforms.Compose([transforms.ToTensor(),
            ])


train_set = datasets.MNIST('./datasets', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)

test_set = datasets.MNIST('./datasets', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=True)

classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

# Set Up Model

In [4]:
from py.utils import get_default_device
from py import nets

device = get_default_device()

base_model = nets.Conv1()
base_model.to(device)
loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(base_model.parameters(), lr=learning_rate, momentum=momentum)

# Save the default weights
#torch.save(base_model.state_dict(), "./models/default_base_model.pt")

# Select an attack for base model

In [5]:
#atk = PGD(base_model, eps=0.3, alpha=0.1, steps=7)
atk = FGSM(base_model, eps=8 / 255)
#atk = CW(base_model, c=5, lr=0.001)

# Train model on adversarial examples from the MNIST

In [6]:
print("Training on adversarial examples")
train_counter, train_losses = train_on_attack(base_model, loss, optimizer, train_loader, device, atk, n_epochs=5)

Training on adversarial examples


  0%|          | 0/2345 [00:00<?, ?it/s]

# Test base model on clean MNIST

In [7]:
standard_acc = test(base_model, test_loader, device, classes)
print(f"Standard accuracy: {standard_acc}")

  0%|          | 0/79 [00:00<?, ?it/s]

Standard accuracy: 99.07


# Set up attacks

In [8]:
from py.attacks import get_attacks
atks = get_attacks(base_model)

# Adversarial training and testing on base model

In [None]:
### NOTE: Long training time ###

from py.train import train_on_attack
from py.train import test_on_attack

data_points = []
robust_acc = dict()

for atk in atks:
    atk_name = atk.__class__.__name__
    print(f"Evaluating on {atk_name}")

    #data_points.append(data)
    
    # Test on adversarial examples
    print(f"Testing on adversarial images produced by attack {atk_name}")
    acc = test_on_attack(base_model, test_loader, device, classes, atk)
    print("\n")
    robust_acc[atk_name] = acc

Evaluating on FGSM
Testing on adversarial images produced by attack FGSM


  0%|          | 0/79 [00:00<?, ?it/s]

Robust accuracy for FGSM: 26.68


Evaluating on PGD
Testing on adversarial images produced by attack PGD


  0%|          | 0/79 [00:00<?, ?it/s]

Robust accuracy for PGD: 0.68


Evaluating on CW
Testing on adversarial images produced by attack CW


  0%|          | 0/79 [00:00<?, ?it/s]

# Plot training data

In [None]:
%matplotlib inline
print(robust_acc)

#pd.DataFrame(robust_acc).plot(kind='bar', color=['r', 'b', 'g'])
plt.bar(*zip(*robust_acc.items()))
plt.ylabel('CE')
plt.xlabel('corruption')
plt.title(f"base model accuracy after adv. training with {atk.__class__.__name__}")
plt.savefig('./plots/mnist_corruption_errors.png', dpi=300, bbox_inches='tight')
plt.show()

# Set up VSGD Model

In [16]:
variability = 0.01

from py.utils import get_default_device
from algo.variable_optim import VSGD

device = get_default_device()

v_model = nets.Conv1()
v_model.to(device)
criterion = nn.CrossEntropyLoss()
v_optim = VSGD(v_model.parameters(), lr=learning_rate, momentum=momentum)

# Save the default weights
#torch.save(base_model.state_dict(), "./models/default_v_model.pt")
#atk = PGD(base_model, eps=0.3, alpha=0.1, steps=7)
#atk = FGSM(v_model, eps=8 / 255)
atk = PGD(v_model, eps=0.3, alpha=0.1, steps=7)
#atk = CW(v_model)
print(device)

cuda


# Train VSGD Model on adversarial examples

In [17]:
train_counter, train_losses = train_on_attack(v_model, criterion, v_optim, train_loader, device, atk, n_epochs=5)

  0%|          | 0/2345 [00:00<?, ?it/s]

# Test on clean MNIST

In [42]:
standard_acc = test(v_model, test_loader, device, classes)
print(f"Standard accuracy: {standard_acc}")

  0%|          | 0/79 [00:00<?, ?it/s]

Standard accuracy: 79.83


# Adversarial Testing on VGSD Model

In [43]:
### NOTE: Long training time ###

from py.train import train_on_attack, test_on_attack

print("Evaluating VSGD model with adversarial training")

robust_acc = dict()

for atk in atks:
    atk_name = atk.__class__.__name__
    print(f"Evaluating on {atk_name}")

    #data_points.append(data)
    
    # Test on adversarial examples
    print(f"Testing with attack {atk_name}")
    acc = test_on_attack(v_model, test_loader, device, classes, atk)
    print("\n")
    robust_acc[atk_name] = acc

Evaluating VSGD model with adversarial training
Evaluating on FGSM
Testing with attack FGSM


  0%|          | 0/79 [00:00<?, ?it/s]

Robust accuracy for FGSM: 99.15


Evaluating on PGD
Testing with attack PGD


  0%|          | 0/79 [00:00<?, ?it/s]

Robust accuracy for PGD: 99.38


Evaluating on CW
Testing with attack CW


  0%|          | 0/79 [00:00<?, ?it/s]

Robust accuracy for CW: 79.29


