# *Baseline* and *Mitigation* Models - Testing

In [1]:
# !pip install adversarial-robustness-toolbox torch matplotlib numpy

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, TensorDataset

from art.attacks.evasion import ProjectedGradientDescent
from art.estimators.classification import PyTorchClassifier
import numpy as np
import matplotlib.pyplot as plt
import os

In [14]:
# defining model architectures

class SimpleCNN(nn.Module):
    def __init__(self, num_classes = 10):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64,3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim = 1)
        return output
    
# defining ResNet component adapted for MNIST 1 * 28 * 28

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride = 1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size = 3, stride = stride, padding = 1, bias = False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size = 3, stride = 1, padding = 1, bias = False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size = 1, stride = stride, bias = False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        
        return out
    
# defining model architectures

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes = 10): # specifically for MNIST
        super(ResNet, self).__init__()
        self.in_planes = 64

        # 1 input channel for MNIST
        self.conv1 = nn.Conv2d(1, 64, kernel_size = 3, stride = 1, padding = 1, bias = False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride = 1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride = 2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride = 2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride = 2)
        
        # final linear layer adapted for 28 * 28 images after pooling
        self.linear = nn.Linear(512 * block.expansion * 4 * 4, num_classes) 

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        layers.append(block(self.in_planes, planes, stride))
        self.in_planes = planes * block.expansion
        for i in range(1, num_blocks):
            layers.append(block(self.in_planes, planes, stride = 1))
            
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        # average pooling for 28 * 28 input results in 3 * 3 feature map before flatten
        out = F.avg_pool2d(out, 1) 
        out = out.view(out.size(0), -1)
        out = self.linear(out)

        return F.log_softmax(out, dim = 1)
        
def ResNet18(num_classes = 10):
    return ResNet(BasicBlock, [2, 2, 2, 2], num_classes)

In [4]:
# checking for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [19]:
MODEL_PATHS = {
    "baseline_cnn": "../models/baseline_model_cnn.pth",
    "mitigation_cnn": "../models/mitigation_model_cnn.pth",
    "detection_cnn": "../models/detection_model_cnn.pth",
    "baseline_resnet": "../models/baseline_model_resnet.pth",
    "mitigation_resnet": "../models/mitigation_model_resnet.pth",
    "detection_resnet": "../models/detection_model_resnet.pth",
}

In [6]:
# preparing the test data

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])
test_dataset = datasets.MNIST(root = './data', train = False, download = True, transform = transform)
test_loader_clean = DataLoader(test_dataset, batch_size = 1, shuffle = False)

100%|██████████| 9.91M/9.91M [01:19<00:00, 124kB/s] 
100%|██████████| 28.9k/28.9k [00:00<00:00, 77.0kB/s]
100%|██████████| 1.65M/1.65M [00:04<00:00, 362kB/s] 
100%|██████████| 4.54k/4.54k [00:00<00:00, 1.31MB/s]


In [7]:
baseline_model_for_art = SimpleCNN(num_classes = 10).to(device)
try:
    baseline_model_for_art.load_state_dict(torch.load(MODEL_PATHS["baseline_cnn"], map_location = device))
    baseline_model_for_art.eval()
except:
    print("Warning: Could not load CNN baseline model; initializing without saved weights")

In [8]:
# generating an adversial attack

ADVERSARIAL_DATASET_FILE = "adv_test_dataset_pgd015.npy"

baseline_classifier_art = PyTorchClassifier(
    model = baseline_model_for_art,
    loss = nn.CrossEntropyLoss(),
    input_shape = (1, 28, 28),
    nb_classes = 10,
    device_type = device
)

if os.path.exists(ADVERSARIAL_DATASET_FILE):
    print(f"Loading existing adversarial dataset from {ADVERSARIAL_DATASET_FILE}...")
    x_test_adv_np = np.load(ADVERSARIAL_DATASET_FILE)
else:
    print(f"Creating Adversarial dataset with PGD Epsilon = 0.15")
    pgd_attack = ProjectedGradientDescent(baseline_classifier_art, eps = 0.15, eps_step = 0.01, max_iter = 40)

    x_test_np = test_dataset.data.numpy().reshape(-1, 1, 28, 28).astype(np.float32) / 255.0
    y_test_np = test_dataset.targets.numpy()

    x_test_adv_np = pgd_attack.generate(x = x_test_np, y = y_test_np)
    np.save(ADVERSARIAL_DATASET_FILE, x_test_adv_np)
    print(f"Adversarial dataset saved to {ADVERSARIAL_DATASET_FILE}")

y_test_np = test_dataset.targets.numpy()
x_test_adv_tensor = torch.from_numpy(x_test_adv_np).to(device)
adv_test_dataset = TensorDataset(x_test_adv_tensor, torch.from_numpy(y_test_np).long().to(device))
adv_test_loader = DataLoader(adv_test_dataset, batch_size = 128, shuffle = False)
print("Adversarial test loader created") 

Creating Adversarial dataset with PGD Epsilon = 0.15


PGD - Batches:   0%|          | 0/313 [00:00<?, ?it/s]



Adversarial dataset saved to adv_test_dataset_pgd015.npy
Adversarial test loader created


In [9]:
# evaluation function
def eval_model(model, loader, description, is_detection = False):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim = 1, keepdim = True)
            if is_detection:
                correct += pred.eq(target.view_as(pred)).sum().item()
            else:
                correct += pred.eq(target.view_as(pred)).sum().item()
            total += data.size(0)
    accuracy = 100.0 * correct / total
    print(f"{description} Accuracy: {accuracy:.2f}%")
    
    return accuracy

In [10]:
# model initialization and loading

def load_model(architecture, num_classes, path):
    model = architecture(num_classes).to(device)
    try:
        model.load_state_dict(torch.load(path, map_location = device))
        print(f"Loaded {architecture.__name__} from {path}")
        return model
    except Exception as e:
        print(f"Error: Could not load {architecture.__name__} from {path}, skipping; error: {e}")
        return None

In [20]:
baseline_model_cnn = load_model(SimpleCNN, 10, MODEL_PATHS["baseline_cnn"])
mitigation_model_cnn = load_model(SimpleCNN, 10, MODEL_PATHS["mitigation_cnn"])

baseline_model_resnet = load_model(ResNet18, 10, MODEL_PATHS["baseline_resnet"])
mitigation_model_resnet = load_model(ResNet18, 10, MODEL_PATHS["mitigation_resnet"])

# detection_model_cnn = load_model(SimpleCNN, 2, MODEL_PATHS['detection_cnn'])
# detection_model_resnet = load_model(ResNet18, 2, MODEL_PATHS['detection_resnet'])


Loaded SimpleCNN from ../models/baseline_model_cnn.pth
Loaded SimpleCNN from ../models/mitigation_model_cnn.pth
Loaded ResNet18 from ../models/baseline_model_resnet.pth
Loaded ResNet18 from ../models/mitigation_model_resnet.pth


In [21]:
# performance evaluation

print("Perfomance Evaluation")

if baseline_model_cnn and mitigation_model_cnn:
    eval_model(baseline_model_cnn, test_loader_clean, "Baseline CNN (Clean Data)")
    eval_model(baseline_model_cnn, adv_test_loader, "Baseline CNN (Adversarial Data)")
    
    eval_model(mitigation_model_cnn, test_loader_clean, "Mitigation CNN (Clean Data)")
    eval_model(mitigation_model_cnn, adv_test_loader, "Mitigation CNN (Adversarial Data)")

Perfomance Evaluation
Baseline CNN (Clean Data) Accuracy: 99.02%
Baseline CNN (Adversarial Data) Accuracy: 2.05%


KeyboardInterrupt: 

In [22]:
if baseline_model_resnet and mitigation_model_resnet:
    eval_model(baseline_model_resnet, test_loader_clean, "Baseline ResNet (Clean Data)")
    eval_model(baseline_model_resnet, adv_test_loader, "Baseline ResNet (Adversarial Data)")
    
    eval_model(mitigation_model_resnet, test_loader_clean, "Mitigation ResNet (Clean Data)")
    eval_model(mitigation_model_resnet, adv_test_loader, "Mitigation ResNet (Adversarial Data)")

Baseline ResNet (Clean Data) Accuracy: 98.89%
Baseline ResNet (Adversarial Data) Accuracy: 9.74%
Mitigation ResNet (Clean Data) Accuracy: 11.35%
Mitigation ResNet (Adversarial Data) Accuracy: 11.35%
