# Test dataset

## MNIST Dataset

In this file, we aim to get 4 sets of examples to test our metric from the MNIST dataset:
- Well-classified examples
- Wrongly classified examples
- Adversarial examples (well classified examples modified to be wrongly classified)
- Original examples of the adversarial examples before attack

## ResNet
For the ResNet, we also aim to prepare the same 4 datasets, but with example images extracted from online sources. However we won't prepare 4 datasets with 10 examples each as finding images that were wrongly classified would be a lengthy process


By running this file you should get a file called `metric_testing_dataset.pkl` and a file called `resnet_metric_testing_dataset.pkl`, which is a pickle file of a dictionary `dict[str, np.ndarray]`, with the examples to be used to test the metric.\
These files should be copied into `./tests/data/metric_testing_dataset.pkl` and `./tests/data/resnet_metric_testing_dataset.pkl` to be used for testing in the project

In [1]:
# Num of examples to save for each set
NUM_EXAMPLES = 10

import numpy as np
import random
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.models as models
from art.attacks.evasion import ProjectedGradientDescent
from art.estimators.classification import PyTorchClassifier
from torch.utils.data import DataLoader
from PIL import Image
from torchvision.models import ResNet50_Weights
import pickle
import os

from model import Model, get_mnist_dataset, load_cnn_model

# Helper to stop execution in notebook
def exit():
    class StopExecution(Exception):
        def _render_traceback_(self):
            return []
    raise StopExecution

  from .autonotebook import tqdm as notebook_tqdm


# MNIST Dataset

In [2]:
device = torch.device("cpu") # Keep CPU to not deal with space constraints

model: Model = load_cnn_model()
if model is None:
    print("Could not load model, something went wrong")
    exit()

model.eval()

_, test_dataset = get_mnist_dataset()

In [3]:
test_loader = DataLoader(
    dataset=test_dataset, 
    batch_size=len(test_dataset),
    shuffle=False
)

X_test, y_test = next(iter(test_loader))

In [4]:
model.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)

In [5]:
# Classify test dataset
pred = model.forward(X_test)
pred = torch.argmax(pred, dim=1)

well_classified = y_test[pred == y_test]

accuracy = (pred == y_test).sum().item() / len(y_test)
print(f"Accuracy on test dataset: {accuracy*100:.2f}% ({len(well_classified)}/{len(y_test)})")

Accuracy on test dataset: 98.95% (9895/10000)


In [6]:
loss = nn.CrossEntropyLoss()
classifier = PyTorchClassifier(
    model=model, 
    loss=loss, 
    input_shape=X_test[0].shape, 
    nb_classes=10,
    device_type='cpu'
)
attack_model = ProjectedGradientDescent(
    estimator=classifier,
    eps=16 / 255 * 784**0.5,
    norm=2,
)

base_k = 512
idxs_chosen = random.sample(range(len(X_test)), k=base_k)
x_original: np.ndarray = (X_test[idxs_chosen]).numpy()
y_original: np.ndarray = (y_test[idxs_chosen]).numpy()

x_adv = attack_model.generate(x_original)


                                                              

In [7]:
y_adv: np.ndarray = torch.argmax(
    model(torch.from_numpy(x_adv)), dim=1
).numpy()

adv_well_classified = x_adv[y_adv == y_original]
accuracy = (y_adv == y_original).sum().item() / len(y_original)
print(f"Accuracy on adversarial examples: {accuracy*100:.2f}% ({len(adv_well_classified)}/{base_k})")
print(f"Effectiveness of attack (1-accuracy): {(1 - accuracy)*100:.2f}% ")

# Now Cho

Accuracy on adversarial examples: 91.80% (470/512)
Effectiveness of attack (1-accuracy): 8.20% 


Now from the successfuly images that got attacked, choose 10 examples
and from the original dataset choose 10 TP and 10 wrongly classified

In [8]:
successful_adv_examples = x_adv[y_adv != y_original]
well_classified = x_original[y_adv == y_original]
wrongly_classified: np.ndarray = X_test[pred != y_test].numpy()

print(f"Got {len(successful_adv_examples)} successful adversarial examples")

chosen_adv_examples_idxs = random.sample(range(len(successful_adv_examples)), k=NUM_EXAMPLES)
chosen_well_classified_idxs = random.sample(range(len(well_classified)), k=NUM_EXAMPLES)
chosen_wrongly_classified_idxs = random.sample(range(len(wrongly_classified)), k=NUM_EXAMPLES)

final_dataset = {
    "adv_examples": {
        "x": successful_adv_examples[chosen_adv_examples_idxs], 
        "y": y_adv[chosen_adv_examples_idxs]
    },
    "original_adv_example": {
        "x": x_original[chosen_adv_examples_idxs], 
        "y": y_original[chosen_adv_examples_idxs]
    },
    "well_classified": {
        "x": well_classified[chosen_well_classified_idxs], 
        "y": y_original[chosen_well_classified_idxs]
    },
    "wrongly_classified": {
        "x": wrongly_classified[chosen_wrongly_classified_idxs],
        "y": y_test[chosen_wrongly_classified_idxs].numpy()
    }
}
import pickle
pickle.dump(final_dataset, open("metric_testing_dataset.pkl", "wb"))

Got 42 successful adversarial examples


# ResNet Dataset

In [9]:
# Get all image file paths from the existing 'dataset' directory
image_files = []
dataset_dir = 'data/ResNet'
if not os.path.isdir(dataset_dir):
    print(f"Error: The '{dataset_dir}' directory does not exist. Please create it and add images.")
    exit()

for image in os.listdir(dataset_dir):
    if image.lower().endswith(('.png', '.jpg', '.jpeg')):
        image_files.append(os.path.join(dataset_dir, image))

print(f"Found {len(image_files)} images.")

Found 107 images.


In [10]:
weights = ResNet50_Weights.IMAGENET1K_V2
preprocess = weights.transforms()

# Prepare dataset for ResNet
X_resnet = []
y_resnet = []
image_paths = []

# Get the mapping from category name to index
categories = weights.meta["categories"]
category_to_idx = {category: i for i, category in enumerate(categories)}

for image_path in image_files:
    try:
        img = Image.open(image_path).convert("RGB")
        processed_img = preprocess(img).unsqueeze(0)

        basename = os.path.basename(image_path)
        label_name = " ".join(
            basename.split('_Image')[0].split("_")
        )

        if label_name in category_to_idx:
            y_resnet.append(category_to_idx[label_name])
            X_resnet.append(processed_img)
            image_paths.append(image_path)

    except Exception as e:
        print(f"Could not process {image_path}: {e}")

X_resnet = torch.cat(X_resnet)
y_resnet = torch.tensor(y_resnet)

print(f"Loaded {len(X_resnet)} images for ResNet.")

resnet_model = models.resnet50(weights=weights)
resnet_model.eval()

with torch.no_grad():
    pred_resnet = resnet_model(X_resnet).argmax(dim=1)

accuracy_resnet = (pred_resnet == y_resnet).sum().item() / len(y_resnet)
print(f"Accuracy on ResNet dataset: {accuracy_resnet*100:.2f}%")


np_X_resnet = X_resnet.numpy()
np_y_resnet = y_resnet.numpy()

Loaded 107 images for ResNet.
Accuracy on ResNet dataset: 94.39%


In [11]:
well_classified_mask = pred_resnet.numpy() == np_y_resnet
wrongly_classified_mask = ~well_classified_mask

well_classified_resnet_x = np_X_resnet[well_classified_mask]
well_classified_resnet_y = np_y_resnet[well_classified_mask]

wrongly_classified_resnet_x = np_X_resnet[wrongly_classified_mask]
wrongly_classified_resnet_y = np_y_resnet[wrongly_classified_mask]

loss_resnet = nn.CrossEntropyLoss()
classifier_resnet = PyTorchClassifier(
    model=resnet_model,
    loss=loss_resnet,
    input_shape=X_resnet[0].shape,
    nb_classes=1000, # ImageNet has 1000 classes
    device_type='cpu'
)

# Setup the attack
attack_resnet = ProjectedGradientDescent(
    estimator=classifier_resnet,
    eps=8/255,
    norm=np.inf,
    max_iter=20,
    eps_step=2/255,
    batch_size=4
)


x_original_resnet = well_classified_resnet_x[NUM_EXAMPLES:NUM_EXAMPLES*2]
y_original_resnet = well_classified_resnet_y[NUM_EXAMPLES:NUM_EXAMPLES*2]
x_adv_resnet = attack_resnet.generate(x=x_original_resnet)
y_adv_resnet = classifier_resnet.predict(x_adv_resnet).argmax(axis=1)

successful_attacks = y_adv_resnet != y_original_resnet
print(f"ResNet attack success rate: {np.sum(successful_attacks) / len(y_original_resnet) * 100:.2f}%")

adv_examples_resnet_x = x_adv_resnet[successful_attacks]
adv_examples_resnet_y = y_adv_resnet[successful_attacks]

original_adv_examples_resnet_x = x_original_resnet[successful_attacks]
original_adv_examples_resnet_y = y_original_resnet[successful_attacks]

                                                            

ResNet attack success rate: 80.00%


In [12]:
wrongly_classified_indices = np.random.choice(
    len(wrongly_classified_resnet_x),
    min(NUM_EXAMPLES, len(wrongly_classified_resnet_x)),
    replace=False
)
adv_indices = np.random.choice(
    len(adv_examples_resnet_x),
    min(NUM_EXAMPLES, len(adv_examples_resnet_x)),
    replace=False
)

final_resnet_dataset = {
    "well_classified": {
        "x": well_classified_resnet_x[:NUM_EXAMPLES],
        "y": well_classified_resnet_y[:NUM_EXAMPLES]
    },
    "wrongly_classified": {
        "x": wrongly_classified_resnet_x[wrongly_classified_indices],
        "y": wrongly_classified_resnet_y[wrongly_classified_indices]
    },
    "adv_examples": {
        "x": adv_examples_resnet_x[adv_indices],
        "y": adv_examples_resnet_y[adv_indices]
    },
    "original_adv_example": {
        "x": original_adv_examples_resnet_x[adv_indices],
        "y": original_adv_examples_resnet_y[adv_indices]
    }
}

# Save the datasets
pickle.dump(final_resnet_dataset, open("resnet_metric_testing_dataset.pkl", "wb"))

print("ResNet metric testing datasets saved.")
print("ResNet Shapes:", {k: v['x'].shape for k, v in final_resnet_dataset.items()})

ResNet metric testing datasets saved.
ResNet Shapes: {'well_classified': (10, 3, 224, 224), 'wrongly_classified': (6, 3, 224, 224), 'adv_examples': (8, 3, 224, 224), 'original_adv_example': (8, 3, 224, 224)}


In [13]:
print("Verifying the contents of the final ResNet dataset:")

with torch.no_grad():
    for name, data in final_resnet_dataset.items():
        print(f"\n--- {name.replace('_', ' ').title()} ---")

        x_sample = torch.from_numpy(data['x'])
        y_true = data['y']
        y_pred = resnet_model(x_sample).argmax(dim=1).numpy()

        accuracy = np.sum(y_pred == y_true) / len(y_true)
        
        print(f"Labels (True):      {y_true}")
        print(f"Labels (Predicted): {y_pred}")
        print(f"Accuracy: {accuracy * 100:.2f}%")

        if name == "well_classified":
            print("Expectation: All should be well-classified (High Accuracy).")
        elif name == "wrongly_classified":
            print("Expectation: These were originally wrongly-classified by the model.")
        elif name == "adv_examples":
            print("Expectation: Accuracy should be high, as 'y' is the adversarial label.")
        elif name == "original_adv_example":
            print("Expectation: Accuracy should be high, as these were well-classified before the attack.")

Verifying the contents of the final ResNet dataset:

--- Well Classified ---
Labels (True):      [949 949 949 949 949 949 949 949 292 342]
Labels (Predicted): [949 949 949 949 949 949 949 949 292 342]
Accuracy: 100.00%
Expectation: All should be well-classified (High Accuracy).

--- Wrongly Classified ---
Labels (True):      [288 288 288 288 817 836]
Labels (Predicted): [289 290 289 289 609 837]
Accuracy: 0.00%
Expectation: These were originally wrongly-classified by the model.

--- Adv Examples ---
Labels (True):      [654 817 232 781 285 324 588 517]
Labels (Predicted): [654 817 232 781 285 324 588 517]
Accuracy: 100.00%
Expectation: Accuracy should be high, as 'y' is the adversarial label.

--- Original Adv Example ---
Labels (True):      [779 779 294 931 779 779 463 779]
Labels (Predicted): [779 779 294 931 779 779 463 779]
Accuracy: 100.00%
Expectation: Accuracy should be high, as these were well-classified before the attack.
