# A Test Notebook to Draft Adversarial attack experiments

In [59]:
from PIL import Image
import torch
import torch.nn as nn
import tensorflow as tf
import numpy as np
import pandas as pd

In [60]:
import timm
# Load pretrained PyTorch feature extractor
model = timm.create_model(
    'resnet18.a1_in1k',
    pretrained=True,
    num_classes=0,  # remove classifier nn.Linear
)
feature_extractor = model.eval()

# get model specific transforms (normalization, resize)
data_config = timm.data.resolve_model_data_config(model)
transforms = timm.data.create_transform(**data_config, is_training=False)

In [61]:
import pickle

# Load test data from pickle file
with open('W:/DS/Project/CNN Prototype/cifar-10-batches-py/test_batch', 'rb') as f:  # Replace with actual path
    data = pickle.load(f, encoding='bytes')

BATCH_NUMBER = 200

def load_batch(data, BATCH_NUMBER = BATCH_NUMBER, iteration = 0):
    # Handle different data formats
    if isinstance(data, dict):
        x_test = data[b'data'][iteration*BATCH_NUMBER:(iteration + 1)*BATCH_NUMBER]
        x_test = [Image.fromarray(image.reshape(3,32,32).transpose(1,2,0)) for image in x_test]
        y_test = data[b'labels'][iteration*BATCH_NUMBER:(iteration + 1)*BATCH_NUMBER]
    else:
        x_test, y_test = data

    y_test = pd.get_dummies(y_test).values

    # Convert to PyTorch tensors for the feature extractor
    x_test_pt = torch.stack([transforms(image) for image in x_test])  # NHWC to NCHW
    y_test_pt = torch.tensor(y_test).float()
    return x_test_pt, y_test_pt


In [62]:
# FGSM Attack Function
def fgsm_attack(input_image, label, epsilon, classifier):
    input_image.requires_grad = True  # Enable gradient tracking

    # Step 1: Extract features using PyTorch
    features = feature_extractor(input_image)
    features_np = features.detach().cpu().numpy()

    # Step 2: Forward pass through TensorFlow classifier
    with tf.GradientTape() as tape:
        features_tf = tf.convert_to_tensor(features_np)
        tape.watch(features_tf)
        predictions = classifier(features_tf)
        loss = tf.keras.losses.categorical_crossentropy(label, predictions)

    # Step 3: Backpropagate to get gradient w.r.t features
    grad_features = tape.gradient(loss, features_tf).numpy()

    # Step 4: Chain rule to get gradient w.r.t input image
    grad_features_pt = torch.tensor(grad_features).to(input_image.device)
    features.backward(grad_features_pt)
    grad_input = input_image.grad.data

    # Step 5: Apply FGSM perturbation
    perturbation = epsilon * grad_input.sign()
    adversarial_image = input_image + perturbation

    return adversarial_image

# Evaluate on Adversarial Examples
def evaluate_adversarial(epsilon=0.01, classifiers = None, num_samples=BATCH_NUMBER, classifiers_names = None):
    correct = np.zeros(len(classifiers_names))
    adv_correct = np.zeros(len(classifiers_names))
    for i in range(int(10000 / num_samples)):
        x_test_pt, y_test_pt = load_batch(data, BATCH_NUMBER = BATCH_NUMBER, iteration = i)
        for j in range(num_samples):
            image = x_test_pt[j:j+1]
            label = y_test_pt[j:j+1]
            # Forward pass through both models
            features = feature_extractor(image).detach().cpu().numpy()
            for k,classifier in enumerate(classifiers):
                pred = classifier.predict(features)

                adv_image = fgsm_attack(image.clone(), label, epsilon, classifier = classifier)

                adv_features = feature_extractor(adv_image).detach().cpu().numpy()
                adv_pred = classifier.predict(adv_features)

                if np.argmax(pred) == np.argmax(label):
                    correct[k] += 1
                if np.argmax(adv_pred) == np.argmax(label):
                    adv_correct[k] += 1
    accuracy = correct / 10000
    adv_accuracy = adv_correct / 10000
    for k,classifier in enumerate(classifiers):
        print('Baseline Accuracy for ' + classifiers_names[k] + f' (ε = {epsilon}): {accuracy[k] * 100:.2f}%')
        print('Adversarial Accuracy for ' + classifiers_names[k] + f' (ε = {epsilon}): {adv_accuracy[k] * 100:.2f}%')
    return accuracy, adv_accuracy

MP

In [None]:
# Load TensorFlow classifier
neurons = ['MP','RBF','ECF']
MP_classifier = tf.keras.models.load_model('W:/DS/Project/CNN Experiment/ResNet18/CIFAR10/CIFAR10_ResNet18_MP_epochs_20.keras')  # Replace with actual path
RBF_classifier = tf.keras.models.load_model('W:/DS/Project/CNN Experiment/ResNet18/CIFAR10/CIFAR10_ResNet18_RBF_epochs_20.keras')  # Replace with actual path
ECF_classifier = tf.keras.models.load_model('W:/DS/Project/CNN Experiment/ResNet18/CIFAR10/CIFAR10_ResNet18_ENN_epochs_20.keras')  # Replace with actual path
classifiers = [MP_classifier,RBF_classifier,ECF_classifier]
# Run evaluation
evaluate_adversarial(epsilon=0.007, classifiers = classifiers, classifiers_names = neurons)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17