# *Detection* Model - Training

In [1]:
!pip install adversarial-robustness-toolbox torch matplotlib numpy

Collecting adversarial-robustness-toolbox
  Downloading adversarial_robustness_toolbox-1.20.1-py3-none-any.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, TensorDataset

from art.attacks.evasion import ProjectedGradientDescent
from art.estimators.classification import PyTorchClassifier
import numpy as np

In [3]:
# defining model architecture

class SimpleCNN(nn.Module):
    def __init__(self, num_classes = 10):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64,3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim = 1)
        return output

In [11]:
# defining training training hyperparameters

EPOCHS_DETECTION = 10
BATCH_SIZE = 128
LEARNING_RATE = 0.001
EPSILON = 0.09 # pertuberation size for PGD attack

# checking for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [5]:
# loading data

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])
test_dataset = datasets.MNIST(root = '/kaggle/working/', train = True, download = True, transform = transform)

100%|██████████| 9.91M/9.91M [00:00<00:00, 18.0MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 478kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.48MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 6.91MB/s]


In [6]:
# helper function for attack generation

def generate_adversarial_data(classifier, x_data, y_data, attack_method):
    print(f"Generating adversarial examples using {attack_method.__class__.__name__}...")
    x_adv = attack_method.generate(x = x_data, y = y_data)
    return x_adv

In [7]:
# training phase - detection model

# need to upload the "baseline_model.pth" file to the
# working directory `/kaggle/working` before running this cell

baseline_model = SimpleCNN().to(device)
baseline_model.load_state_dict(torch.load('/kaggle/input/baseline_model/other/default/1/baseline_model.pth'))
baseline_model.eval()

baseline_classifer_art = PyTorchClassifier(model = baseline_model,
                                           loss = nn.CrossEntropyLoss(),
                                           input_shape = (1, 28, 28),
                                           nb_classes = 10,
                                           device_type = "gpu")

pgd_attack = ProjectedGradientDescent(baseline_classifer_art, eps = EPSILON)

In [8]:
# generating training dataset for detection

# using the test dataset to create the adversarial examples

x_test_np = test_dataset.data.numpy().reshape(-1, 1, 28, 28).astype(np.float32) / 255.0
y_test_np = test_dataset.targets.numpy()
x_adv_np = generate_adversarial_data(baseline_classifer_art, x_test_np, y_test_np, pgd_attack)

x_detection_tensor = torch.from_numpy(np.concatenate((x_test_np, x_adv_np), axis = 0))
y_detection_tensor = torch.from_numpy(np.concatenate((np.zeros(len(x_test_np)), np.ones(len(x_adv_np))), axis = 0)).long()
detection_dataset = TensorDataset(x_detection_tensor, y_detection_tensor)
detection_loader = DataLoader(detection_dataset, batch_size = BATCH_SIZE, shuffle = True)

print(f"Detection dataset generated of size: {len(detection_dataset)}")

Generating adversarial examples using ProjectedGradientDescent...


PGD - Batches:   0%|          | 0/1875 [00:00<?, ?it/s]



Detection dataset generated of size: 120000


In [None]:
detection_model = SimpleCNN(num_classes = 2).to(device)
detector_optimizer = optim.Adam(detection_model.parameters(), lr = LEARNING_RATE)
loss_fn = nn.CrossEntropyLoss()

print("Detection model training started...")
for epoch in range(1, EPOCHS_DETECTION + 1):
    detection_model.train()
    for data, target in detection_loader:
        data, target = data.to(device), target.to(device)
        detector_optimizer.zero_grad()
        output = detection_model(data)
        loss = loss_fn(output, target)
        loss.backward()
        detector_optimizer.step()
    
    print(f"Epoch {epoch}/{EPOCHS_DETECTION}, Loss: {loss.item()}")

torch.save(detection_model.state_dict(), 'detection_model.pth')
print("Detection model saved as 'detection_model.pth'")

Detection model training started...
Epoch 1/10, Loss: 2.70073405772564e-06
Epoch 2/10, Loss: 2.7546250294108177e-06
Epoch 3/10, Loss: 1.303851426825986e-08
Epoch 4/10, Loss: 0.0
Epoch 5/10, Loss: 1.8626450382086546e-09
Epoch 6/10, Loss: 2.4214367400077208e-08
Epoch 7/10, Loss: 2.6596324005367933e-06
Epoch 8/10, Loss: 8.940671136770106e-08
Epoch 9/10, Loss: 0.0
Epoch 10/10, Loss: 0.0
Detection model saved as 'detector_model.pth'
